Coverage Report

Created: 2023-06-07 06:31

/src/aom/aom_dsp/simd/v128_intrinsics_x86.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_X86_H_
13
#define AOM_AOM_DSP_SIMD_V128_INTRINSICS_X86_H_
14
15
#include <stdint.h>
16
#include "aom_dsp/simd/v64_intrinsics_x86.h"
17
18
typedef __m128i v128;
19
20
9.47k
SIMD_INLINE uint32_t v128_low_u32(v128 a) {
21
9.47k
  return (uint32_t)_mm_cvtsi128_si32(a);
22
9.47k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_low_u32
Unexecuted instantiation: cdef_block_ssse3.c:v128_low_u32
Unexecuted instantiation: cdef_block_sse4.c:v128_low_u32
cdef_block_avx2.c:v128_low_u32
Line
Count
Source
20
9.47k
SIMD_INLINE uint32_t v128_low_u32(v128 a) {
21
9.47k
  return (uint32_t)_mm_cvtsi128_si32(a);
22
9.47k
}
23
24
90.4M
SIMD_INLINE v64 v128_low_v64(v128 a) {
25
90.4M
  return _mm_unpacklo_epi64(a, v64_zero());
26
90.4M
}
Unexecuted instantiation: cdef_block_sse2.c:v128_low_v64
Unexecuted instantiation: cdef_block_ssse3.c:v128_low_v64
Unexecuted instantiation: cdef_block_sse4.c:v128_low_v64
cdef_block_avx2.c:v128_low_v64
Line
Count
Source
24
90.4M
SIMD_INLINE v64 v128_low_v64(v128 a) {
25
90.4M
  return _mm_unpacklo_epi64(a, v64_zero());
26
90.4M
}
27
28
88.7M
SIMD_INLINE v64 v128_high_v64(v128 a) { return _mm_srli_si128(a, 8); }
Unexecuted instantiation: cdef_block_sse2.c:v128_high_v64
Unexecuted instantiation: cdef_block_ssse3.c:v128_high_v64
Unexecuted instantiation: cdef_block_sse4.c:v128_high_v64
cdef_block_avx2.c:v128_high_v64
Line
Count
Source
28
88.7M
SIMD_INLINE v64 v128_high_v64(v128 a) { return _mm_srli_si128(a, 8); }
29
30
324M
SIMD_INLINE v128 v128_from_v64(v64 a, v64 b) {
31
324M
  return _mm_unpacklo_epi64(b, a);
32
324M
}
Unexecuted instantiation: cdef_block_sse2.c:v128_from_v64
Unexecuted instantiation: cdef_block_ssse3.c:v128_from_v64
Unexecuted instantiation: cdef_block_sse4.c:v128_from_v64
cdef_block_avx2.c:v128_from_v64
Line
Count
Source
30
324M
SIMD_INLINE v128 v128_from_v64(v64 a, v64 b) {
31
324M
  return _mm_unpacklo_epi64(b, a);
32
324M
}
33
34
0
SIMD_INLINE v128 v128_from_64(uint64_t a, uint64_t b) {
35
0
  return v128_from_v64(v64_from_64(a), v64_from_64(b));
36
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_from_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_from_64
Unexecuted instantiation: cdef_block_sse4.c:v128_from_64
Unexecuted instantiation: cdef_block_avx2.c:v128_from_64
37
38
170k
SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
39
170k
  return _mm_set_epi32((int)a, (int)b, (int)c, (int)d);
40
170k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_from_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_from_32
Unexecuted instantiation: cdef_block_sse4.c:v128_from_32
cdef_block_avx2.c:v128_from_32
Line
Count
Source
38
170k
SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
39
170k
  return _mm_set_epi32((int)a, (int)b, (int)c, (int)d);
40
170k
}
41
42
141M
SIMD_INLINE v128 v128_load_aligned(const void *p) {
43
141M
  return _mm_load_si128((__m128i *)p);
44
141M
}
Unexecuted instantiation: cdef_block_sse2.c:v128_load_aligned
Unexecuted instantiation: cdef_block_ssse3.c:v128_load_aligned
Unexecuted instantiation: cdef_block_sse4.c:v128_load_aligned
cdef_block_avx2.c:v128_load_aligned
Line
Count
Source
42
141M
SIMD_INLINE v128 v128_load_aligned(const void *p) {
43
141M
  return _mm_load_si128((__m128i *)p);
44
141M
}
45
46
641M
SIMD_INLINE v128 v128_load_unaligned(const void *p) {
47
#if defined(__SSSE3__)
48
  return _mm_lddqu_si128((__m128i *)p);
49
#else
50
  return _mm_loadu_si128((__m128i *)p);
51
#endif
52
641M
}
Unexecuted instantiation: cdef_block_sse2.c:v128_load_unaligned
Unexecuted instantiation: cdef_block_ssse3.c:v128_load_unaligned
Unexecuted instantiation: cdef_block_sse4.c:v128_load_unaligned
cdef_block_avx2.c:v128_load_unaligned
Line
Count
Source
46
641M
SIMD_INLINE v128 v128_load_unaligned(const void *p) {
47
641M
#if defined(__SSSE3__)
48
641M
  return _mm_lddqu_si128((__m128i *)p);
49
#else
50
  return _mm_loadu_si128((__m128i *)p);
51
#endif
52
641M
}
53
54
0
SIMD_INLINE void v128_store_aligned(void *p, v128 a) {
55
0
  _mm_store_si128((__m128i *)p, a);
56
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_store_aligned
Unexecuted instantiation: cdef_block_ssse3.c:v128_store_aligned
Unexecuted instantiation: cdef_block_sse4.c:v128_store_aligned
Unexecuted instantiation: cdef_block_avx2.c:v128_store_aligned
57
58
122M
SIMD_INLINE void v128_store_unaligned(void *p, v128 a) {
59
122M
  _mm_storeu_si128((__m128i *)p, a);
60
122M
}
Unexecuted instantiation: cdef_block_sse2.c:v128_store_unaligned
Unexecuted instantiation: cdef_block_ssse3.c:v128_store_unaligned
Unexecuted instantiation: cdef_block_sse4.c:v128_store_unaligned
cdef_block_avx2.c:v128_store_unaligned
Line
Count
Source
58
122M
SIMD_INLINE void v128_store_unaligned(void *p, v128 a) {
59
122M
  _mm_storeu_si128((__m128i *)p, a);
60
122M
}
61
62
// The following function requires an immediate.
63
// Some compilers will check this during optimisation, others wont.
64
#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
65
#if defined(__SSSE3__)
66
SIMD_INLINE v128 v128_align(v128 a, v128 b, const unsigned int c) {
67
  return c ? _mm_alignr_epi8(a, b, c) : b;
68
}
69
#else
70
#define v128_align(a, b, c) \
71
  ((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
72
#endif
73
#else
74
#if defined(__SSSE3__)
75
18.9k
#define v128_align(a, b, c) ((c) ? _mm_alignr_epi8(a, b, (uint8_t)(c)) : (b))
76
#else
77
#define v128_align(a, b, c) \
78
0
  ((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
79
#endif
80
#endif
81
82
0
SIMD_INLINE v128 v128_zero() { return _mm_setzero_si128(); }
Unexecuted instantiation: cdef_block_sse2.c:v128_zero
Unexecuted instantiation: cdef_block_ssse3.c:v128_zero
Unexecuted instantiation: cdef_block_sse4.c:v128_zero
Unexecuted instantiation: cdef_block_avx2.c:v128_zero
83
84
0
SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8((char)x); }
Unexecuted instantiation: cdef_block_sse2.c:v128_dup_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_dup_8
Unexecuted instantiation: cdef_block_sse4.c:v128_dup_8
Unexecuted instantiation: cdef_block_avx2.c:v128_dup_8
85
86
75.6k
SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
Unexecuted instantiation: cdef_block_sse2.c:v128_dup_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_dup_16
Unexecuted instantiation: cdef_block_sse4.c:v128_dup_16
cdef_block_avx2.c:v128_dup_16
Line
Count
Source
86
75.6k
SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
87
88
18.9k
SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
Unexecuted instantiation: cdef_block_sse2.c:v128_dup_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_dup_32
Unexecuted instantiation: cdef_block_sse4.c:v128_dup_32
cdef_block_avx2.c:v128_dup_32
Line
Count
Source
88
18.9k
SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
89
90
0
SIMD_INLINE v128 v128_dup_64(uint64_t x) {
91
0
  // _mm_set_pi64x and _mm_cvtsi64x_si64 missing in some compilers
92
0
  return _mm_set_epi32((int32_t)(x >> 32), (int32_t)x, (int32_t)(x >> 32),
93
0
                       (int32_t)x);
94
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_dup_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_dup_64
Unexecuted instantiation: cdef_block_sse4.c:v128_dup_64
Unexecuted instantiation: cdef_block_avx2.c:v128_dup_64
95
96
0
SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return _mm_add_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_add_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_add_8
Unexecuted instantiation: cdef_block_sse4.c:v128_add_8
Unexecuted instantiation: cdef_block_avx2.c:v128_add_8
97
98
606k
SIMD_INLINE v128 v128_add_16(v128 a, v128 b) { return _mm_add_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_add_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_add_16
Unexecuted instantiation: cdef_block_sse4.c:v128_add_16
cdef_block_avx2.c:v128_add_16
Line
Count
Source
98
606k
SIMD_INLINE v128 v128_add_16(v128 a, v128 b) { return _mm_add_epi16(a, b); }
99
100
0
SIMD_INLINE v128 v128_sadd_u8(v128 a, v128 b) { return _mm_adds_epu8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sadd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_sadd_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_sadd_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_sadd_u8
101
102
0
SIMD_INLINE v128 v128_sadd_s8(v128 a, v128 b) { return _mm_adds_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sadd_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_sadd_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_sadd_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_sadd_s8
103
104
0
SIMD_INLINE v128 v128_sadd_s16(v128 a, v128 b) { return _mm_adds_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sadd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_sadd_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_sadd_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_sadd_s16
105
106
113k
SIMD_INLINE v128 v128_add_32(v128 a, v128 b) { return _mm_add_epi32(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_add_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_add_32
Unexecuted instantiation: cdef_block_sse4.c:v128_add_32
cdef_block_avx2.c:v128_add_32
Line
Count
Source
106
113k
SIMD_INLINE v128 v128_add_32(v128 a, v128 b) { return _mm_add_epi32(a, b); }
107
108
0
SIMD_INLINE v128 v128_add_64(v128 a, v128 b) { return _mm_add_epi64(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_add_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_add_64
Unexecuted instantiation: cdef_block_sse4.c:v128_add_64
Unexecuted instantiation: cdef_block_avx2.c:v128_add_64
109
110
0
SIMD_INLINE v128 v128_padd_s16(v128 a) {
111
0
  return _mm_madd_epi16(a, _mm_set1_epi16(1));
112
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_padd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_padd_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_padd_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_padd_s16
113
114
0
SIMD_INLINE v128 v128_sub_8(v128 a, v128 b) { return _mm_sub_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sub_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_sub_8
Unexecuted instantiation: cdef_block_sse4.c:v128_sub_8
Unexecuted instantiation: cdef_block_avx2.c:v128_sub_8
115
116
0
SIMD_INLINE v128 v128_ssub_u8(v128 a, v128 b) { return _mm_subs_epu8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssub_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssub_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_ssub_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_ssub_u8
117
118
0
SIMD_INLINE v128 v128_ssub_s8(v128 a, v128 b) { return _mm_subs_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssub_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssub_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_ssub_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_ssub_s8
119
120
75.6k
SIMD_INLINE v128 v128_sub_16(v128 a, v128 b) { return _mm_sub_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sub_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_sub_16
Unexecuted instantiation: cdef_block_sse4.c:v128_sub_16
cdef_block_avx2.c:v128_sub_16
Line
Count
Source
120
75.6k
SIMD_INLINE v128 v128_sub_16(v128 a, v128 b) { return _mm_sub_epi16(a, b); }
121
122
0
SIMD_INLINE v128 v128_ssub_s16(v128 a, v128 b) { return _mm_subs_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssub_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssub_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_ssub_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_ssub_s16
123
124
0
SIMD_INLINE v128 v128_ssub_u16(v128 a, v128 b) { return _mm_subs_epu16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssub_u16
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssub_u16
Unexecuted instantiation: cdef_block_sse4.c:v128_ssub_u16
Unexecuted instantiation: cdef_block_avx2.c:v128_ssub_u16
125
126
0
SIMD_INLINE v128 v128_sub_32(v128 a, v128 b) { return _mm_sub_epi32(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sub_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_sub_32
Unexecuted instantiation: cdef_block_sse4.c:v128_sub_32
Unexecuted instantiation: cdef_block_avx2.c:v128_sub_32
127
128
0
SIMD_INLINE v128 v128_sub_64(v128 a, v128 b) { return _mm_sub_epi64(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sub_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_sub_64
Unexecuted instantiation: cdef_block_sse4.c:v128_sub_64
Unexecuted instantiation: cdef_block_avx2.c:v128_sub_64
129
130
0
SIMD_INLINE v128 v128_abs_s16(v128 a) {
131
#if defined(__SSSE3__)
132
  return _mm_abs_epi16(a);
133
#else
134
  return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
135
#endif
136
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_abs_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_abs_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_abs_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_abs_s16
137
138
0
SIMD_INLINE v128 v128_abs_s8(v128 a) {
139
0
#if defined(__SSSE3__)
140
0
  return _mm_abs_epi8(a);
141
0
#else
142
0
  v128 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
143
0
  return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
144
0
#endif
145
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_abs_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_abs_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_abs_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_abs_s8
146
147
0
SIMD_INLINE v128 v128_ziplo_8(v128 a, v128 b) {
148
0
  return _mm_unpacklo_epi8(b, a);
149
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziplo_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziplo_8
Unexecuted instantiation: cdef_block_sse4.c:v128_ziplo_8
Unexecuted instantiation: cdef_block_avx2.c:v128_ziplo_8
150
151
0
SIMD_INLINE v128 v128_ziphi_8(v128 a, v128 b) {
152
0
  return _mm_unpackhi_epi8(b, a);
153
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziphi_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziphi_8
Unexecuted instantiation: cdef_block_sse4.c:v128_ziphi_8
Unexecuted instantiation: cdef_block_avx2.c:v128_ziphi_8
154
155
94.6k
SIMD_INLINE v128 v128_ziplo_16(v128 a, v128 b) {
156
94.6k
  return _mm_unpacklo_epi16(b, a);
157
94.6k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziplo_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziplo_16
Unexecuted instantiation: cdef_block_sse4.c:v128_ziplo_16
cdef_block_avx2.c:v128_ziplo_16
Line
Count
Source
155
94.6k
SIMD_INLINE v128 v128_ziplo_16(v128 a, v128 b) {
156
94.6k
  return _mm_unpacklo_epi16(b, a);
157
94.6k
}
158
159
94.6k
SIMD_INLINE v128 v128_ziphi_16(v128 a, v128 b) {
160
94.6k
  return _mm_unpackhi_epi16(b, a);
161
94.6k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziphi_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziphi_16
Unexecuted instantiation: cdef_block_sse4.c:v128_ziphi_16
cdef_block_avx2.c:v128_ziphi_16
Line
Count
Source
159
94.6k
SIMD_INLINE v128 v128_ziphi_16(v128 a, v128 b) {
160
94.6k
  return _mm_unpackhi_epi16(b, a);
161
94.6k
}
162
163
75.7k
SIMD_INLINE v128 v128_ziplo_32(v128 a, v128 b) {
164
75.7k
  return _mm_unpacklo_epi32(b, a);
165
75.7k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziplo_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziplo_32
Unexecuted instantiation: cdef_block_sse4.c:v128_ziplo_32
cdef_block_avx2.c:v128_ziplo_32
Line
Count
Source
163
75.7k
SIMD_INLINE v128 v128_ziplo_32(v128 a, v128 b) {
164
75.7k
  return _mm_unpacklo_epi32(b, a);
165
75.7k
}
166
167
75.7k
SIMD_INLINE v128 v128_ziphi_32(v128 a, v128 b) {
168
75.7k
  return _mm_unpackhi_epi32(b, a);
169
75.7k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziphi_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziphi_32
Unexecuted instantiation: cdef_block_sse4.c:v128_ziphi_32
cdef_block_avx2.c:v128_ziphi_32
Line
Count
Source
167
75.7k
SIMD_INLINE v128 v128_ziphi_32(v128 a, v128 b) {
168
75.7k
  return _mm_unpackhi_epi32(b, a);
169
75.7k
}
170
171
75.7k
SIMD_INLINE v128 v128_ziplo_64(v128 a, v128 b) {
172
75.7k
  return _mm_unpacklo_epi64(b, a);
173
75.7k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziplo_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziplo_64
Unexecuted instantiation: cdef_block_sse4.c:v128_ziplo_64
cdef_block_avx2.c:v128_ziplo_64
Line
Count
Source
171
75.7k
SIMD_INLINE v128 v128_ziplo_64(v128 a, v128 b) {
172
75.7k
  return _mm_unpacklo_epi64(b, a);
173
75.7k
}
174
175
75.7k
SIMD_INLINE v128 v128_ziphi_64(v128 a, v128 b) {
176
75.7k
  return _mm_unpackhi_epi64(b, a);
177
75.7k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ziphi_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_ziphi_64
Unexecuted instantiation: cdef_block_sse4.c:v128_ziphi_64
cdef_block_avx2.c:v128_ziphi_64
Line
Count
Source
175
75.7k
SIMD_INLINE v128 v128_ziphi_64(v128 a, v128 b) {
176
75.7k
  return _mm_unpackhi_epi64(b, a);
177
75.7k
}
178
179
0
SIMD_INLINE v128 v128_zip_8(v64 a, v64 b) { return _mm_unpacklo_epi8(b, a); }
Unexecuted instantiation: cdef_block_sse2.c:v128_zip_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_zip_8
Unexecuted instantiation: cdef_block_sse4.c:v128_zip_8
Unexecuted instantiation: cdef_block_avx2.c:v128_zip_8
180
181
0
SIMD_INLINE v128 v128_zip_16(v64 a, v64 b) { return _mm_unpacklo_epi16(b, a); }
Unexecuted instantiation: cdef_block_sse2.c:v128_zip_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_zip_16
Unexecuted instantiation: cdef_block_sse4.c:v128_zip_16
Unexecuted instantiation: cdef_block_avx2.c:v128_zip_16
182
183
0
SIMD_INLINE v128 v128_zip_32(v64 a, v64 b) { return _mm_unpacklo_epi32(b, a); }
Unexecuted instantiation: cdef_block_sse2.c:v128_zip_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_zip_32
Unexecuted instantiation: cdef_block_sse4.c:v128_zip_32
Unexecuted instantiation: cdef_block_avx2.c:v128_zip_32
184
185
0
SIMD_INLINE v128 v128_unziphi_8(v128 a, v128 b) {
186
0
  return _mm_packs_epi16(_mm_srai_epi16(b, 8), _mm_srai_epi16(a, 8));
187
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unziphi_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_unziphi_8
Unexecuted instantiation: cdef_block_sse4.c:v128_unziphi_8
Unexecuted instantiation: cdef_block_avx2.c:v128_unziphi_8
188
189
0
SIMD_INLINE v128 v128_unziplo_8(v128 a, v128 b) {
190
0
#if defined(__SSSE3__)
191
0
#ifdef __x86_64__
192
0
  v128 order = _mm_cvtsi64_si128(0x0e0c0a0806040200LL);
193
0
#else
194
0
  v128 order = _mm_set_epi32(0, 0, 0x0e0c0a08, 0x06040200);
195
0
#endif
196
0
  return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
197
0
                            _mm_shuffle_epi8(a, order));
198
0
#else
199
0
  return v128_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));
200
0
#endif
201
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unziplo_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_unziplo_8
Unexecuted instantiation: cdef_block_sse4.c:v128_unziplo_8
Unexecuted instantiation: cdef_block_avx2.c:v128_unziplo_8
202
203
0
SIMD_INLINE v128 v128_unziphi_16(v128 a, v128 b) {
204
0
  return _mm_packs_epi32(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16));
205
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unziphi_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unziphi_16
Unexecuted instantiation: cdef_block_sse4.c:v128_unziphi_16
Unexecuted instantiation: cdef_block_avx2.c:v128_unziphi_16
206
207
0
SIMD_INLINE v128 v128_unziplo_16(v128 a, v128 b) {
208
0
#if defined(__SSSE3__)
209
0
#ifdef __x86_64__
210
0
  v128 order = _mm_cvtsi64_si128(0x0d0c090805040100LL);
211
0
#else
212
0
  v128 order = _mm_set_epi32(0, 0, 0x0d0c0908, 0x05040100);
213
0
#endif
214
0
  return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
215
0
                            _mm_shuffle_epi8(a, order));
216
0
#else
217
0
  return v128_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));
218
0
#endif
219
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unziplo_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unziplo_16
Unexecuted instantiation: cdef_block_sse4.c:v128_unziplo_16
Unexecuted instantiation: cdef_block_avx2.c:v128_unziplo_16
220
221
0
SIMD_INLINE v128 v128_unziphi_32(v128 a, v128 b) {
222
0
  return _mm_castps_si128(_mm_shuffle_ps(
223
0
      _mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(3, 1, 3, 1)));
224
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unziphi_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unziphi_32
Unexecuted instantiation: cdef_block_sse4.c:v128_unziphi_32
Unexecuted instantiation: cdef_block_avx2.c:v128_unziphi_32
225
226
0
SIMD_INLINE v128 v128_unziplo_32(v128 a, v128 b) {
227
0
  return _mm_castps_si128(_mm_shuffle_ps(
228
0
      _mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(2, 0, 2, 0)));
229
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unziplo_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unziplo_32
Unexecuted instantiation: cdef_block_sse4.c:v128_unziplo_32
Unexecuted instantiation: cdef_block_avx2.c:v128_unziplo_32
230
231
0
SIMD_INLINE v128 v128_unpack_u8_s16(v64 a) {
232
0
  return _mm_unpacklo_epi8(a, _mm_setzero_si128());
233
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpack_u8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpack_u8_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_unpack_u8_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_unpack_u8_s16
234
235
0
SIMD_INLINE v128 v128_unpacklo_u8_s16(v128 a) {
236
0
  return _mm_unpacklo_epi8(a, _mm_setzero_si128());
237
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpacklo_u8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpacklo_u8_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_unpacklo_u8_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_unpacklo_u8_s16
238
239
0
SIMD_INLINE v128 v128_unpackhi_u8_s16(v128 a) {
240
0
  return _mm_unpackhi_epi8(a, _mm_setzero_si128());
241
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpackhi_u8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpackhi_u8_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_unpackhi_u8_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_unpackhi_u8_s16
242
243
0
SIMD_INLINE v128 v128_unpack_s8_s16(v64 a) {
244
0
  return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
245
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpack_s8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpack_s8_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_unpack_s8_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_unpack_s8_s16
246
247
0
SIMD_INLINE v128 v128_unpacklo_s8_s16(v128 a) {
248
0
  return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
249
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpacklo_s8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpacklo_s8_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_unpacklo_s8_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_unpacklo_s8_s16
250
251
0
SIMD_INLINE v128 v128_unpackhi_s8_s16(v128 a) {
252
0
  return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8);
253
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpackhi_s8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpackhi_s8_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_unpackhi_s8_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_unpackhi_s8_s16
254
255
9.46k
SIMD_INLINE v128 v128_pack_s32_s16(v128 a, v128 b) {
256
9.46k
  return _mm_packs_epi32(b, a);
257
9.46k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_pack_s32_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_pack_s32_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_pack_s32_s16
cdef_block_avx2.c:v128_pack_s32_s16
Line
Count
Source
255
9.46k
SIMD_INLINE v128 v128_pack_s32_s16(v128 a, v128 b) {
256
9.46k
  return _mm_packs_epi32(b, a);
257
9.46k
}
258
259
0
SIMD_INLINE v128 v128_pack_s32_u16(v128 a, v128 b) {
260
0
#if defined(__SSE4_1__)
261
0
  return _mm_packus_epi32(b, a);
262
0
#else
263
0
  return v128_from_v64(v64_pack_s32_u16(v128_high_v64(a), v128_low_v64(a)),
264
0
                       v64_pack_s32_u16(v128_high_v64(b), v128_low_v64(b)));
265
0
#endif
266
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_pack_s32_u16
Unexecuted instantiation: cdef_block_ssse3.c:v128_pack_s32_u16
Unexecuted instantiation: cdef_block_sse4.c:v128_pack_s32_u16
Unexecuted instantiation: cdef_block_avx2.c:v128_pack_s32_u16
267
268
15.2M
SIMD_INLINE v128 v128_pack_s16_u8(v128 a, v128 b) {
269
15.2M
  return _mm_packus_epi16(b, a);
270
15.2M
}
Unexecuted instantiation: cdef_block_sse2.c:v128_pack_s16_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_pack_s16_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_pack_s16_u8
cdef_block_avx2.c:v128_pack_s16_u8
Line
Count
Source
268
15.2M
SIMD_INLINE v128 v128_pack_s16_u8(v128 a, v128 b) {
269
15.2M
  return _mm_packus_epi16(b, a);
270
15.2M
}
271
272
9.46k
SIMD_INLINE v128 v128_pack_s16_s8(v128 a, v128 b) {
273
9.46k
  return _mm_packs_epi16(b, a);
274
9.46k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_pack_s16_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_pack_s16_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_pack_s16_s8
cdef_block_avx2.c:v128_pack_s16_s8
Line
Count
Source
272
9.46k
SIMD_INLINE v128 v128_pack_s16_s8(v128 a, v128 b) {
273
9.46k
  return _mm_packs_epi16(b, a);
274
9.46k
}
275
276
0
SIMD_INLINE v128 v128_unpack_u16_s32(v64 a) {
277
0
  return _mm_unpacklo_epi16(a, _mm_setzero_si128());
278
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpack_u16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpack_u16_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_unpack_u16_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_unpack_u16_s32
279
280
0
SIMD_INLINE v128 v128_unpack_s16_s32(v64 a) {
281
0
  return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);
282
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpack_s16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpack_s16_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_unpack_s16_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_unpack_s16_s32
283
284
0
SIMD_INLINE v128 v128_unpacklo_u16_s32(v128 a) {
285
0
  return _mm_unpacklo_epi16(a, _mm_setzero_si128());
286
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpacklo_u16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpacklo_u16_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_unpacklo_u16_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_unpacklo_u16_s32
287
288
0
SIMD_INLINE v128 v128_unpacklo_s16_s32(v128 a) {
289
0
  return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);
290
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpacklo_s16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpacklo_s16_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_unpacklo_s16_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_unpacklo_s16_s32
291
292
0
SIMD_INLINE v128 v128_unpackhi_u16_s32(v128 a) {
293
0
  return _mm_unpackhi_epi16(a, _mm_setzero_si128());
294
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpackhi_u16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpackhi_u16_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_unpackhi_u16_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_unpackhi_u16_s32
295
296
0
SIMD_INLINE v128 v128_unpackhi_s16_s32(v128 a) {
297
0
  return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16);
298
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_unpackhi_s16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_unpackhi_s16_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_unpackhi_s16_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_unpackhi_s16_s32
299
300
56.8k
SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
301
#if defined(__SSSE3__)
302
  return _mm_shuffle_epi8(x, pattern);
303
#else
304
  v128 output;
305
  unsigned char *input = (unsigned char *)&x;
306
  unsigned char *index = (unsigned char *)&pattern;
307
  unsigned char *selected = (unsigned char *)&output;
308
  int counter;
309
310
0
  for (counter = 0; counter < 16; counter++) {
311
0
    selected[counter] = input[index[counter] & 15];
312
0
  }
313
314
  return output;
315
#endif
316
56.8k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shuffle_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_shuffle_8
Unexecuted instantiation: cdef_block_sse4.c:v128_shuffle_8
cdef_block_avx2.c:v128_shuffle_8
Line
Count
Source
300
56.8k
SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
301
56.8k
#if defined(__SSSE3__)
302
56.8k
  return _mm_shuffle_epi8(x, pattern);
303
#else
304
  v128 output;
305
  unsigned char *input = (unsigned char *)&x;
306
  unsigned char *index = (unsigned char *)&pattern;
307
  unsigned char *selected = (unsigned char *)&output;
308
  int counter;
309
310
  for (counter = 0; counter < 16; counter++) {
311
    selected[counter] = input[index[counter] & 15];
312
  }
313
314
  return output;
315
#endif
316
56.8k
}
317
318
0
SIMD_INLINE int64_t v128_dotp_su8(v128 a, v128 b) {
319
0
  v128 t1 = _mm_madd_epi16(v128_unpackhi_s8_s16(a), v128_unpackhi_u8_s16(b));
320
0
  v128 t2 = _mm_madd_epi16(v128_unpacklo_s8_s16(a), v128_unpacklo_u8_s16(b));
321
0
  v128 t = v128_add_32(t1, t2);
322
0
  t = v128_add_32(t, _mm_srli_si128(t, 8));
323
0
  t = v128_add_32(t, _mm_srli_si128(t, 4));
324
0
  return (int32_t)v128_low_u32(t);
325
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_dotp_su8
Unexecuted instantiation: cdef_block_ssse3.c:v128_dotp_su8
Unexecuted instantiation: cdef_block_sse4.c:v128_dotp_su8
Unexecuted instantiation: cdef_block_avx2.c:v128_dotp_su8
326
327
0
SIMD_INLINE int64_t v128_dotp_s16(v128 a, v128 b) {
328
0
  v128 r = _mm_madd_epi16(a, b);
329
0
#if defined(__SSE4_1__) && defined(__x86_64__)
330
0
  v128 c = _mm_add_epi64(_mm_cvtepi32_epi64(r),
331
0
                         _mm_cvtepi32_epi64(_mm_srli_si128(r, 8)));
332
0
  return _mm_cvtsi128_si64(_mm_add_epi64(c, _mm_srli_si128(c, 8)));
333
0
#else
334
0
  return (int64_t)_mm_cvtsi128_si32(r) +
335
0
         (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
336
0
         (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 8)) +
337
0
         (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 12));
338
0
#endif
339
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_dotp_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_dotp_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_dotp_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_dotp_s16
340
341
0
SIMD_INLINE uint64_t v128_hadd_u8(v128 a) {
342
0
  v128 t = _mm_sad_epu8(a, _mm_setzero_si128());
343
0
  return v64_low_u32(v128_low_v64(t)) + v64_low_u32(v128_high_v64(t));
344
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_hadd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_hadd_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_hadd_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_hadd_u8
345
346
typedef v128 sad128_internal;
347
348
0
SIMD_INLINE sad128_internal v128_sad_u8_init() { return _mm_setzero_si128(); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sad_u8_init
Unexecuted instantiation: cdef_block_ssse3.c:v128_sad_u8_init
Unexecuted instantiation: cdef_block_sse4.c:v128_sad_u8_init
Unexecuted instantiation: cdef_block_avx2.c:v128_sad_u8_init
349
350
/* Implementation dependent return value.  Result must be finalised with
351
   v128_sad_sum().
352
   The result for more than 32 v128_sad_u8() calls is undefined. */
353
0
SIMD_INLINE sad128_internal v128_sad_u8(sad128_internal s, v128 a, v128 b) {
354
0
  return _mm_add_epi64(s, _mm_sad_epu8(a, b));
355
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_sad_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_sad_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_sad_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_sad_u8
356
357
0
SIMD_INLINE uint32_t v128_sad_u8_sum(sad128_internal s) {
358
0
  return v128_low_u32(_mm_add_epi32(s, _mm_unpackhi_epi64(s, s)));
359
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_sad_u8_sum
Unexecuted instantiation: cdef_block_ssse3.c:v128_sad_u8_sum
Unexecuted instantiation: cdef_block_sse4.c:v128_sad_u8_sum
Unexecuted instantiation: cdef_block_avx2.c:v128_sad_u8_sum
360
361
typedef int32_t ssd128_internal;
362
363
0
SIMD_INLINE ssd128_internal v128_ssd_u8_init() { return 0; }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssd_u8_init
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssd_u8_init
Unexecuted instantiation: cdef_block_sse4.c:v128_ssd_u8_init
Unexecuted instantiation: cdef_block_avx2.c:v128_ssd_u8_init
364
365
/* Implementation dependent return value.  Result must be finalised with
366
 * v128_ssd_sum(). */
367
0
SIMD_INLINE ssd128_internal v128_ssd_u8(ssd128_internal s, v128 a, v128 b) {
368
0
  v128 z = _mm_setzero_si128();
369
0
  v128 l = _mm_sub_epi16(_mm_unpacklo_epi8(a, z), _mm_unpacklo_epi8(b, z));
370
0
  v128 h = _mm_sub_epi16(_mm_unpackhi_epi8(a, z), _mm_unpackhi_epi8(b, z));
371
0
  v128 rl = _mm_madd_epi16(l, l);
372
0
  v128 rh = _mm_madd_epi16(h, h);
373
0
  v128 r = _mm_add_epi32(rl, rh);
374
0
  r = _mm_add_epi32(r, _mm_srli_si128(r, 8));
375
0
  r = _mm_add_epi32(r, _mm_srli_si128(r, 4));
376
0
  return s + _mm_cvtsi128_si32(r);
377
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ssd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssd_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_ssd_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_ssd_u8
378
379
0
SIMD_INLINE int32_t v128_ssd_u8_sum(ssd128_internal s) { return s; }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssd_u8_sum
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssd_u8_sum
Unexecuted instantiation: cdef_block_sse4.c:v128_ssd_u8_sum
Unexecuted instantiation: cdef_block_avx2.c:v128_ssd_u8_sum
380
381
0
SIMD_INLINE v128 v128_or(v128 a, v128 b) { return _mm_or_si128(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_or
Unexecuted instantiation: cdef_block_ssse3.c:v128_or
Unexecuted instantiation: cdef_block_sse4.c:v128_or
Unexecuted instantiation: cdef_block_avx2.c:v128_or
382
383
0
SIMD_INLINE v128 v128_xor(v128 a, v128 b) { return _mm_xor_si128(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_xor
Unexecuted instantiation: cdef_block_ssse3.c:v128_xor
Unexecuted instantiation: cdef_block_sse4.c:v128_xor
Unexecuted instantiation: cdef_block_avx2.c:v128_xor
384
385
0
SIMD_INLINE v128 v128_and(v128 a, v128 b) { return _mm_and_si128(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_and
Unexecuted instantiation: cdef_block_ssse3.c:v128_and
Unexecuted instantiation: cdef_block_sse4.c:v128_and
Unexecuted instantiation: cdef_block_avx2.c:v128_and
386
387
0
SIMD_INLINE v128 v128_andn(v128 a, v128 b) { return _mm_andnot_si128(b, a); }
Unexecuted instantiation: cdef_block_sse2.c:v128_andn
Unexecuted instantiation: cdef_block_ssse3.c:v128_andn
Unexecuted instantiation: cdef_block_sse4.c:v128_andn
Unexecuted instantiation: cdef_block_avx2.c:v128_andn
388
389
0
SIMD_INLINE v128 v128_mul_s16(v64 a, v64 b) {
390
0
  v64 lo_bits = v64_mullo_s16(a, b);
391
0
  v64 hi_bits = v64_mulhi_s16(a, b);
392
0
  return v128_from_v64(v64_ziphi_16(hi_bits, lo_bits),
393
0
                       v64_ziplo_16(hi_bits, lo_bits));
394
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_mul_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_mul_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_mul_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_mul_s16
395
396
0
SIMD_INLINE v128 v128_mullo_s16(v128 a, v128 b) {
397
0
  return _mm_mullo_epi16(a, b);
398
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_mullo_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_mullo_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_mullo_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_mullo_s16
399
400
0
SIMD_INLINE v128 v128_mulhi_s16(v128 a, v128 b) {
401
0
  return _mm_mulhi_epi16(a, b);
402
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_mulhi_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_mulhi_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_mulhi_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_mulhi_s16
403
404
132k
SIMD_INLINE v128 v128_mullo_s32(v128 a, v128 b) {
405
#if defined(__SSE4_1__)
406
  return _mm_mullo_epi32(a, b);
407
#else
408
  return _mm_unpacklo_epi32(
409
      _mm_shuffle_epi32(_mm_mul_epu32(a, b), 8),
410
      _mm_shuffle_epi32(
411
          _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)), 8));
412
#endif
413
132k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_mullo_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_mullo_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_mullo_s32
cdef_block_avx2.c:v128_mullo_s32
Line
Count
Source
404
132k
SIMD_INLINE v128 v128_mullo_s32(v128 a, v128 b) {
405
132k
#if defined(__SSE4_1__)
406
132k
  return _mm_mullo_epi32(a, b);
407
#else
408
  return _mm_unpacklo_epi32(
409
      _mm_shuffle_epi32(_mm_mul_epu32(a, b), 8),
410
      _mm_shuffle_epi32(
411
          _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)), 8));
412
#endif
413
132k
}
414
415
0
SIMD_INLINE int64_t v128_dotp_s32(v128 a, v128 b) {
416
0
  v128 r = v128_mullo_s32(a, b);
417
0
  return (int64_t)_mm_cvtsi128_si32(r) +
418
0
         (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
419
0
         (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 8)) +
420
0
         (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 12));
421
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_dotp_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_dotp_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_dotp_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_dotp_s32
422
423
132k
SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) { return _mm_madd_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_madd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_madd_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_madd_s16
cdef_block_avx2.c:v128_madd_s16
Line
Count
Source
423
132k
SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) { return _mm_madd_epi16(a, b); }
424
425
0
SIMD_INLINE v128 v128_madd_us8(v128 a, v128 b) {
426
0
#if defined(__SSSE3__)
427
0
  return _mm_maddubs_epi16(a, b);
428
0
#else
429
0
  return _mm_packs_epi32(
430
0
      _mm_madd_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),
431
0
                     _mm_srai_epi16(_mm_unpacklo_epi8(b, b), 8)),
432
0
      _mm_madd_epi16(_mm_unpackhi_epi8(a, _mm_setzero_si128()),
433
0
                     _mm_srai_epi16(_mm_unpackhi_epi8(b, b), 8)));
434
0
#endif
435
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_madd_us8
Unexecuted instantiation: cdef_block_ssse3.c:v128_madd_us8
Unexecuted instantiation: cdef_block_sse4.c:v128_madd_us8
Unexecuted instantiation: cdef_block_avx2.c:v128_madd_us8
436
437
0
SIMD_INLINE v128 v128_padd_u8(v128 a) {
438
0
  return v128_madd_us8(a, _mm_set1_epi8(1));
439
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_padd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_padd_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_padd_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_padd_u8
440
441
0
SIMD_INLINE v128 v128_avg_u8(v128 a, v128 b) { return _mm_avg_epu8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_avg_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_avg_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_avg_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_avg_u8
442
443
0
SIMD_INLINE v128 v128_rdavg_u8(v128 a, v128 b) {
444
0
  return _mm_sub_epi8(_mm_avg_epu8(a, b),
445
0
                      _mm_and_si128(_mm_xor_si128(a, b), v128_dup_8(1)));
446
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_rdavg_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_rdavg_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_rdavg_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_rdavg_u8
447
448
0
SIMD_INLINE v128 v128_rdavg_u16(v128 a, v128 b) {
449
0
  return _mm_sub_epi16(_mm_avg_epu16(a, b),
450
0
                       _mm_and_si128(_mm_xor_si128(a, b), v128_dup_16(1)));
451
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_rdavg_u16
Unexecuted instantiation: cdef_block_ssse3.c:v128_rdavg_u16
Unexecuted instantiation: cdef_block_sse4.c:v128_rdavg_u16
Unexecuted instantiation: cdef_block_avx2.c:v128_rdavg_u16
452
453
0
SIMD_INLINE v128 v128_avg_u16(v128 a, v128 b) { return _mm_avg_epu16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_avg_u16
Unexecuted instantiation: cdef_block_ssse3.c:v128_avg_u16
Unexecuted instantiation: cdef_block_sse4.c:v128_avg_u16
Unexecuted instantiation: cdef_block_avx2.c:v128_avg_u16
454
455
0
SIMD_INLINE v128 v128_min_u8(v128 a, v128 b) { return _mm_min_epu8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_min_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_min_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_min_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_min_u8
456
457
0
SIMD_INLINE v128 v128_max_u8(v128 a, v128 b) { return _mm_max_epu8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_max_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_max_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_max_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_max_u8
458
459
0
SIMD_INLINE v128 v128_min_s8(v128 a, v128 b) {
460
0
#if defined(__SSE4_1__)
461
0
  return _mm_min_epi8(a, b);
462
0
#else
463
0
  v128 mask = _mm_cmplt_epi8(a, b);
464
0
  return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
465
0
#endif
466
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_min_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_min_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_min_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_min_s8
467
468
9.46k
SIMD_INLINE uint32_t v128_movemask_8(v128 a) { return _mm_movemask_epi8(a); }
Unexecuted instantiation: cdef_block_sse2.c:v128_movemask_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_movemask_8
Unexecuted instantiation: cdef_block_sse4.c:v128_movemask_8
cdef_block_avx2.c:v128_movemask_8
Line
Count
Source
468
9.46k
SIMD_INLINE uint32_t v128_movemask_8(v128 a) { return _mm_movemask_epi8(a); }
469
470
0
SIMD_INLINE v128 v128_blend_8(v128 a, v128 b, v128 c) {
471
0
#if defined(__SSE4_1__)
472
0
  return _mm_blendv_epi8(a, b, c);
473
0
#else
474
0
  c = _mm_cmplt_epi8(c, v128_zero());
475
0
  return v128_or(v128_and(b, c), v128_andn(a, c));
476
0
#endif
477
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_blend_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_blend_8
Unexecuted instantiation: cdef_block_sse4.c:v128_blend_8
Unexecuted instantiation: cdef_block_avx2.c:v128_blend_8
478
479
0
SIMD_INLINE v128 v128_max_s8(v128 a, v128 b) {
480
0
#if defined(__SSE4_1__)
481
0
  return _mm_max_epi8(a, b);
482
0
#else
483
0
  v128 mask = _mm_cmplt_epi8(b, a);
484
0
  return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
485
0
#endif
486
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_max_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_max_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_max_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_max_s8
487
488
0
SIMD_INLINE v128 v128_min_s16(v128 a, v128 b) { return _mm_min_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_min_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_min_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_min_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_min_s16
489
490
0
SIMD_INLINE v128 v128_max_s16(v128 a, v128 b) { return _mm_max_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_max_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_max_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_max_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_max_s16
491
492
0
SIMD_INLINE v128 v128_min_s32(v128 a, v128 b) {
493
0
#if defined(__SSE4_1__)
494
0
  return _mm_min_epi32(a, b);
495
0
#else
496
0
  v128 mask = _mm_cmplt_epi32(a, b);
497
0
  return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
498
0
#endif
499
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_min_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_min_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_min_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_min_s32
500
501
28.4k
SIMD_INLINE v128 v128_max_s32(v128 a, v128 b) {
502
#if defined(__SSE4_1__)
503
  return _mm_max_epi32(a, b);
504
#else
505
  v128 mask = _mm_cmplt_epi32(b, a);
506
  return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
507
#endif
508
28.4k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_max_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_max_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_max_s32
cdef_block_avx2.c:v128_max_s32
Line
Count
Source
501
28.4k
SIMD_INLINE v128 v128_max_s32(v128 a, v128 b) {
502
28.4k
#if defined(__SSE4_1__)
503
28.4k
  return _mm_max_epi32(a, b);
504
#else
505
  v128 mask = _mm_cmplt_epi32(b, a);
506
  return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
507
#endif
508
28.4k
}
509
510
0
SIMD_INLINE v128 v128_cmpgt_s8(v128 a, v128 b) { return _mm_cmpgt_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_cmpgt_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmpgt_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_cmpgt_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_cmpgt_s8
511
512
0
SIMD_INLINE v128 v128_cmplt_s8(v128 a, v128 b) { return _mm_cmplt_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_cmplt_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmplt_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_cmplt_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_cmplt_s8
513
514
0
SIMD_INLINE v128 v128_cmpeq_8(v128 a, v128 b) { return _mm_cmpeq_epi8(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_cmpeq_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmpeq_8
Unexecuted instantiation: cdef_block_sse4.c:v128_cmpeq_8
Unexecuted instantiation: cdef_block_avx2.c:v128_cmpeq_8
515
516
0
SIMD_INLINE v128 v128_cmpgt_s16(v128 a, v128 b) {
517
0
  return _mm_cmpgt_epi16(a, b);
518
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_cmpgt_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmpgt_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_cmpgt_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_cmpgt_s16
519
520
0
SIMD_INLINE v128 v128_cmplt_s16(v128 a, v128 b) {
521
0
  return _mm_cmplt_epi16(a, b);
522
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_cmplt_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmplt_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_cmplt_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_cmplt_s16
523
524
18.9k
SIMD_INLINE v128 v128_cmpeq_32(v128 a, v128 b) { return _mm_cmpeq_epi32(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_cmpeq_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmpeq_32
Unexecuted instantiation: cdef_block_sse4.c:v128_cmpeq_32
cdef_block_avx2.c:v128_cmpeq_32
Line
Count
Source
524
18.9k
SIMD_INLINE v128 v128_cmpeq_32(v128 a, v128 b) { return _mm_cmpeq_epi32(a, b); }
525
526
0
SIMD_INLINE v128 v128_cmpgt_s32(v128 a, v128 b) {
527
0
  return _mm_cmpgt_epi32(a, b);
528
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_cmpgt_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmpgt_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_cmpgt_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_cmpgt_s32
529
530
0
SIMD_INLINE v128 v128_cmplt_s32(v128 a, v128 b) {
531
0
  return _mm_cmplt_epi32(a, b);
532
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_cmplt_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmplt_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_cmplt_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_cmplt_s32
533
534
0
SIMD_INLINE v128 v128_cmpeq_16(v128 a, v128 b) { return _mm_cmpeq_epi16(a, b); }
Unexecuted instantiation: cdef_block_sse2.c:v128_cmpeq_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_cmpeq_16
Unexecuted instantiation: cdef_block_sse4.c:v128_cmpeq_16
Unexecuted instantiation: cdef_block_avx2.c:v128_cmpeq_16
535
536
0
SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
537
0
  return _mm_and_si128(_mm_set1_epi8((char)(0xff << c)),
538
0
                       _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
539
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shl_8
Unexecuted instantiation: cdef_block_ssse3.c:v128_shl_8
Unexecuted instantiation: cdef_block_sse4.c:v128_shl_8
Unexecuted instantiation: cdef_block_avx2.c:v128_shl_8
540
541
0
SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
542
0
  return _mm_and_si128(_mm_set1_epi8((char)(0xff >> c)),
543
0
                       _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c)));
544
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_u8
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_u8
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_u8
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_u8
545
546
0
SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
547
0
  __m128i x = _mm_cvtsi32_si128((int)(c + 8));
548
0
  return _mm_packs_epi16(_mm_sra_epi16(_mm_unpacklo_epi8(a, a), x),
549
0
                         _mm_sra_epi16(_mm_unpackhi_epi8(a, a), x));
550
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_s8
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_s8
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_s8
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_s8
551
552
0
SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
553
0
  return _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c));
554
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shl_16
Unexecuted instantiation: cdef_block_ssse3.c:v128_shl_16
Unexecuted instantiation: cdef_block_sse4.c:v128_shl_16
Unexecuted instantiation: cdef_block_avx2.c:v128_shl_16
555
556
0
SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
557
0
  return _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c));
558
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_u16
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_u16
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_u16
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_u16
559
560
75.7k
SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
561
75.7k
  return _mm_sra_epi16(a, _mm_cvtsi32_si128((int)c));
562
75.7k
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_s16
cdef_block_avx2.c:v128_shr_s16
Line
Count
Source
560
75.7k
SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
561
75.7k
  return _mm_sra_epi16(a, _mm_cvtsi32_si128((int)c));
562
75.7k
}
563
564
0
SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
565
0
  return _mm_sll_epi32(a, _mm_cvtsi32_si128((int)c));
566
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shl_32
Unexecuted instantiation: cdef_block_ssse3.c:v128_shl_32
Unexecuted instantiation: cdef_block_sse4.c:v128_shl_32
Unexecuted instantiation: cdef_block_avx2.c:v128_shl_32
567
568
0
SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
569
0
  return _mm_srl_epi32(a, _mm_cvtsi32_si128((int)c));
570
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_u32
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_u32
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_u32
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_u32
571
572
0
SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
573
0
  return _mm_sra_epi32(a, _mm_cvtsi32_si128((int)c));
574
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_s32
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_s32
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_s32
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_s32
575
576
0
SIMD_INLINE v128 v128_shl_64(v128 a, unsigned int c) {
577
0
  return _mm_sll_epi64(a, _mm_cvtsi32_si128((int)c));
578
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shl_64
Unexecuted instantiation: cdef_block_ssse3.c:v128_shl_64
Unexecuted instantiation: cdef_block_sse4.c:v128_shl_64
Unexecuted instantiation: cdef_block_avx2.c:v128_shl_64
579
580
0
SIMD_INLINE v128 v128_shr_u64(v128 a, unsigned int c) {
581
0
  return _mm_srl_epi64(a, _mm_cvtsi32_si128((int)c));
582
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_u64
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_u64
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_u64
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_u64
583
584
0
SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
585
0
  // _mm_sra_epi64 is missing in gcc?
586
0
  return v128_from_64((uint64_t)((int64_t)v64_u64(v128_high_v64(a)) >> c),
587
0
                      (uint64_t)((int64_t)v64_u64(v128_low_v64(a)) >> c));
588
0
  // return _mm_sra_epi64(a, _mm_cvtsi32_si128((int)c));
589
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_shr_s64
Unexecuted instantiation: cdef_block_ssse3.c:v128_shr_s64
Unexecuted instantiation: cdef_block_sse4.c:v128_shr_s64
Unexecuted instantiation: cdef_block_avx2.c:v128_shr_s64
590
591
/* These intrinsics require immediate values, so we must use #defines
592
   to enforce that. */
593
284k
#define v128_shl_n_byte(a, c) _mm_slli_si128(a, (c)&127)
594
284k
#define v128_shr_n_byte(a, c) _mm_srli_si128(a, (c)&127)
595
#define v128_shl_n_8(a, c) \
596
  _mm_and_si128(_mm_set1_epi8((char)(0xff << (c))), _mm_slli_epi16(a, c))
597
#define v128_shr_n_u8(a, c) \
598
  _mm_and_si128(_mm_set1_epi8((char)(0xff >> (c))), _mm_srli_epi16(a, c))
599
#define v128_shr_n_s8(a, c)                                         \
600
  _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), \
601
                  _mm_srai_epi16(_mm_unpackhi_epi8(a, a), (c) + 8))
602
#define v128_shl_n_16(a, c) _mm_slli_epi16(a, c)
603
#define v128_shr_n_u16(a, c) _mm_srli_epi16(a, c)
604
0
#define v128_shr_n_s16(a, c) _mm_srai_epi16(a, c)
605
#define v128_shl_n_32(a, c) _mm_slli_epi32(a, c)
606
#define v128_shr_n_u32(a, c) _mm_srli_epi32(a, c)
607
#define v128_shr_n_s32(a, c) _mm_srai_epi32(a, c)
608
#define v128_shl_n_64(a, c) _mm_slli_epi64(a, c)
609
#define v128_shr_n_u64(a, c) _mm_srli_epi64(a, c)
610
#define v128_shr_n_s64(a, c) \
611
  v128_shr_s64(a, c)  // _mm_srai_epi64 missing in gcc?
612
613
typedef v128 sad128_internal_u16;
614
615
0
SIMD_INLINE sad128_internal_u16 v128_sad_u16_init() { return v128_zero(); }
Unexecuted instantiation: cdef_block_sse2.c:v128_sad_u16_init
Unexecuted instantiation: cdef_block_ssse3.c:v128_sad_u16_init
Unexecuted instantiation: cdef_block_sse4.c:v128_sad_u16_init
Unexecuted instantiation: cdef_block_avx2.c:v128_sad_u16_init
616
617
/* Implementation dependent return value.  Result must be finalised with
618
 * v128_sad_u16_sum(). */
619
SIMD_INLINE sad128_internal_u16 v128_sad_u16(sad128_internal_u16 s, v128 a,
620
0
                                             v128 b) {
621
0
#if defined(__SSE4_1__)
622
0
  v128 t = v128_sub_16(_mm_max_epu16(a, b), _mm_min_epu16(a, b));
623
0
#else
624
0
  v128 t = v128_cmplt_s16(v128_xor(a, v128_dup_16(32768)),
625
0
                          v128_xor(b, v128_dup_16(32768)));
626
0
  t = v128_sub_16(v128_or(v128_and(b, t), v128_andn(a, t)),
627
0
                  v128_or(v128_and(a, t), v128_andn(b, t)));
628
0
#endif
629
0
  return v128_add_32(
630
0
      s, v128_add_32(v128_unpackhi_u16_s32(t), v128_unpacklo_u16_s32(t)));
631
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_sad_u16
Unexecuted instantiation: cdef_block_ssse3.c:v128_sad_u16
Unexecuted instantiation: cdef_block_sse4.c:v128_sad_u16
Unexecuted instantiation: cdef_block_avx2.c:v128_sad_u16
632
633
0
SIMD_INLINE uint32_t v128_sad_u16_sum(sad128_internal_u16 s) {
634
0
  return v128_low_u32(s) + v128_low_u32(v128_shr_n_byte(s, 4)) +
635
0
         v128_low_u32(v128_shr_n_byte(s, 8)) +
636
0
         v128_low_u32(v128_shr_n_byte(s, 12));
637
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_sad_u16_sum
Unexecuted instantiation: cdef_block_ssse3.c:v128_sad_u16_sum
Unexecuted instantiation: cdef_block_sse4.c:v128_sad_u16_sum
Unexecuted instantiation: cdef_block_avx2.c:v128_sad_u16_sum
638
639
typedef v128 ssd128_internal_s16;
640
641
0
SIMD_INLINE ssd128_internal_s16 v128_ssd_s16_init() { return v128_zero(); }
Unexecuted instantiation: cdef_block_sse2.c:v128_ssd_s16_init
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssd_s16_init
Unexecuted instantiation: cdef_block_sse4.c:v128_ssd_s16_init
Unexecuted instantiation: cdef_block_avx2.c:v128_ssd_s16_init
642
643
/* Implementation dependent return value.  Result must be finalised with
644
 * v128_ssd_s16_sum(). */
645
SIMD_INLINE ssd128_internal_s16 v128_ssd_s16(ssd128_internal_s16 s, v128 a,
646
0
                                             v128 b) {
647
0
  v128 d = v128_sub_16(a, b);
648
0
  d = v128_madd_s16(d, d);
649
0
  return v128_add_64(s, v128_add_64(_mm_unpackhi_epi32(d, v128_zero()),
650
0
                                    _mm_unpacklo_epi32(d, v128_zero())));
651
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ssd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssd_s16
Unexecuted instantiation: cdef_block_sse4.c:v128_ssd_s16
Unexecuted instantiation: cdef_block_avx2.c:v128_ssd_s16
652
653
0
SIMD_INLINE uint64_t v128_ssd_s16_sum(ssd128_internal_s16 s) {
654
0
  return v64_u64(v128_low_v64(s)) + v64_u64(v128_high_v64(s));
655
0
}
Unexecuted instantiation: cdef_block_sse2.c:v128_ssd_s16_sum
Unexecuted instantiation: cdef_block_ssse3.c:v128_ssd_s16_sum
Unexecuted instantiation: cdef_block_sse4.c:v128_ssd_s16_sum
Unexecuted instantiation: cdef_block_avx2.c:v128_ssd_s16_sum
656
657
#endif  // AOM_AOM_DSP_SIMD_V128_INTRINSICS_X86_H_