Coverage Report

Created: 2025-10-13 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/row_any.cc
Line
Count
Source
1
/*
2
 *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/row.h"
12
13
#include <string.h>  // For memset.
14
15
#include "libyuv/basic_types.h"
16
17
#ifdef __cplusplus
18
namespace libyuv {
19
extern "C" {
20
#endif
21
22
// memset for vin is meant to clear the source buffer so that
23
// SIMD that reads full multiple of 16 bytes will not trigger msan errors.
24
// memset is not needed for production, as the garbage values are processed but
25
// not used, although there may be edge cases for subsampling.
26
// The size of the buffer is based on the largest read, which can be inferred
27
// by the source type (e.g. ARGB) and the mask (last parameter), or by examining
28
// the source code for how much the source pointers are advanced.
29
30
// Subsampled source needs to be increase by 1 of not even.
31
29.6M
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
32
33
// Any 4 planes to 1
34
#define ANY41(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)               \
35
  void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
36
               const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
37
0
               int width) {                                                  \
38
0
    SIMD_ALIGNED(uint8_t vin[64 * 4]);                                       \
39
0
    SIMD_ALIGNED(uint8_t vout[64]);                                          \
40
0
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
41
0
    int r = width & MASK;                                                    \
42
0
    int n = width & ~MASK;                                                   \
43
0
    if (n > 0) {                                                             \
44
0
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n);                      \
45
0
    }                                                                        \
46
0
    memcpy(vin, y_buf + n, r);                                               \
47
0
    memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
48
0
    memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
49
0
    memcpy(vin + 192, a_buf + n, r);                                         \
50
0
    ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1);           \
51
0
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);    \
52
0
  }
Unexecuted instantiation: MergeARGBRow_Any_SSE2
Unexecuted instantiation: MergeARGBRow_Any_AVX2
53
54
#ifdef HAS_MERGEARGBROW_SSE2
55
ANY41(MergeARGBRow_Any_SSE2, MergeARGBRow_SSE2, 0, 0, 4, 7)
56
#endif
57
#ifdef HAS_MERGEARGBROW_AVX2
58
ANY41(MergeARGBRow_Any_AVX2, MergeARGBRow_AVX2, 0, 0, 4, 15)
59
#endif
60
#ifdef HAS_MERGEARGBROW_NEON
61
ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
62
#endif
63
64
// Note that odd width replication includes 444 due to implementation
65
// on arm that subsamples 444 to 422 internally.
66
// Any 4 planes to 1 with yuvconstants
67
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)              \
68
  void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
69
               const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
70
9.85k
               const struct YuvConstants* yuvconstants, int width) {         \
71
9.85k
    SIMD_ALIGNED(uint8_t vin[64 * 4]);                                       \
72
9.85k
    SIMD_ALIGNED(uint8_t vout[64]);                                          \
73
9.85k
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
74
9.85k
    int r = width & MASK;                                                    \
75
9.85k
    int n = width & ~MASK;                                                   \
76
9.85k
    if (n > 0) {                                                             \
77
1.38k
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
78
1.38k
    }                                                                        \
79
9.85k
    memcpy(vin, y_buf + n, r);                                               \
80
9.85k
    memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
81
9.85k
    memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
82
9.85k
    memcpy(vin + 192, a_buf + n, r);                                         \
83
9.85k
    if (width & 1) {                                                         \
84
7.84k
      vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1];               \
85
7.84k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];             \
86
7.84k
    }                                                                        \
87
9.85k
    ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants,        \
88
9.85k
             MASK + 1);                                                      \
89
9.85k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);    \
90
9.85k
  }
Unexecuted instantiation: I444AlphaToARGBRow_Any_SSSE3
I444AlphaToARGBRow_Any_AVX2
Line
Count
Source
70
6.11k
               const struct YuvConstants* yuvconstants, int width) {         \
71
6.11k
    SIMD_ALIGNED(uint8_t vin[64 * 4]);                                       \
72
6.11k
    SIMD_ALIGNED(uint8_t vout[64]);                                          \
73
6.11k
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
74
6.11k
    int r = width & MASK;                                                    \
75
6.11k
    int n = width & ~MASK;                                                   \
76
6.11k
    if (n > 0) {                                                             \
77
1.05k
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
78
1.05k
    }                                                                        \
79
6.11k
    memcpy(vin, y_buf + n, r);                                               \
80
6.11k
    memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
81
6.11k
    memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
82
6.11k
    memcpy(vin + 192, a_buf + n, r);                                         \
83
6.11k
    if (width & 1) {                                                         \
84
4.62k
      vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1];               \
85
4.62k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];             \
86
4.62k
    }                                                                        \
87
6.11k
    ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants,        \
88
6.11k
             MASK + 1);                                                      \
89
6.11k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);    \
90
6.11k
  }
Unexecuted instantiation: I422AlphaToARGBRow_Any_SSSE3
I422AlphaToARGBRow_Any_AVX2
Line
Count
Source
70
3.73k
               const struct YuvConstants* yuvconstants, int width) {         \
71
3.73k
    SIMD_ALIGNED(uint8_t vin[64 * 4]);                                       \
72
3.73k
    SIMD_ALIGNED(uint8_t vout[64]);                                          \
73
3.73k
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
74
3.73k
    int r = width & MASK;                                                    \
75
3.73k
    int n = width & ~MASK;                                                   \
76
3.73k
    if (n > 0) {                                                             \
77
326
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
78
326
    }                                                                        \
79
3.73k
    memcpy(vin, y_buf + n, r);                                               \
80
3.73k
    memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
81
3.73k
    memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
82
3.73k
    memcpy(vin + 192, a_buf + n, r);                                         \
83
3.73k
    if (width & 1) {                                                         \
84
3.22k
      vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1];               \
85
3.22k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];             \
86
3.22k
    }                                                                        \
87
3.73k
    ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants,        \
88
3.73k
             MASK + 1);                                                      \
89
3.73k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);    \
90
3.73k
  }
91
92
#ifdef HAS_I444ALPHATOARGBROW_SSSE3
93
ANY41C(I444AlphaToARGBRow_Any_SSSE3, I444AlphaToARGBRow_SSSE3, 0, 0, 4, 7)
94
#endif
95
#ifdef HAS_I444ALPHATOARGBROW_AVX2
96
ANY41C(I444AlphaToARGBRow_Any_AVX2, I444AlphaToARGBRow_AVX2, 0, 0, 4, 15)
97
#endif
98
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
99
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
100
#endif
101
#ifdef HAS_I422ALPHATOARGBROW_AVX2
102
ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
103
#endif
104
#ifdef HAS_I444ALPHATOARGBROW_NEON
105
ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7)
106
#endif
107
#ifdef HAS_I422ALPHATOARGBROW_NEON
108
ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
109
#endif
110
#ifdef HAS_I444ALPHATOARGBROW_MSA
111
ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
112
#endif
113
#ifdef HAS_I422ALPHATOARGBROW_MSA
114
ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
115
#endif
116
#ifdef HAS_I422ALPHATOARGBROW_LSX
117
ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15)
118
#endif
119
#ifdef HAS_I422ALPHATOARGBROW_LASX
120
ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
121
#endif
122
#undef ANY41C
123
124
// Any 4 planes to 1 plane of 8 bit with yuvconstants
125
#define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)      \
126
  void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
127
               uint8_t* dst_ptr, const struct YuvConstants* yuvconstants,      \
128
9.07k
               int width) {                                                    \
129
9.07k
    SIMD_ALIGNED(T vin[16 * 4]);                                               \
130
9.07k
    SIMD_ALIGNED(uint8_t vout[64]);                                            \
131
9.07k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                       \
132
9.07k
    int r = width & MASK;                                                      \
133
9.07k
    int n = width & ~MASK;                                                     \
134
9.07k
    if (n > 0) {                                                               \
135
406
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);          \
136
406
    }                                                                          \
137
9.07k
    memcpy(vin, y_buf + n, r * SBPP);                                          \
138
9.07k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
139
9.07k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
140
9.07k
    memcpy(vin + 48, a_buf + n, r * SBPP);                                     \
141
9.07k
    ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
142
9.07k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);      \
143
9.07k
  }
Unexecuted instantiation: I210AlphaToARGBRow_Any_SSSE3
I210AlphaToARGBRow_Any_AVX2
Line
Count
Source
128
4.15k
               int width) {                                                    \
129
4.15k
    SIMD_ALIGNED(T vin[16 * 4]);                                               \
130
4.15k
    SIMD_ALIGNED(uint8_t vout[64]);                                            \
131
4.15k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                       \
132
4.15k
    int r = width & MASK;                                                      \
133
4.15k
    int n = width & ~MASK;                                                     \
134
4.15k
    if (n > 0) {                                                               \
135
194
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);          \
136
194
    }                                                                          \
137
4.15k
    memcpy(vin, y_buf + n, r * SBPP);                                          \
138
4.15k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
139
4.15k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
140
4.15k
    memcpy(vin + 48, a_buf + n, r * SBPP);                                     \
141
4.15k
    ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
142
4.15k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);      \
143
4.15k
  }
Unexecuted instantiation: I410AlphaToARGBRow_Any_SSSE3
I410AlphaToARGBRow_Any_AVX2
Line
Count
Source
128
4.91k
               int width) {                                                    \
129
4.91k
    SIMD_ALIGNED(T vin[16 * 4]);                                               \
130
4.91k
    SIMD_ALIGNED(uint8_t vout[64]);                                            \
131
4.91k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                       \
132
4.91k
    int r = width & MASK;                                                      \
133
4.91k
    int n = width & ~MASK;                                                     \
134
4.91k
    if (n > 0) {                                                               \
135
212
      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);          \
136
212
    }                                                                          \
137
4.91k
    memcpy(vin, y_buf + n, r * SBPP);                                          \
138
4.91k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
139
4.91k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
140
4.91k
    memcpy(vin + 48, a_buf + n, r * SBPP);                                     \
141
4.91k
    ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
142
4.91k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);      \
143
4.91k
  }
144
145
#ifdef HAS_I210ALPHATOARGBROW_NEON
146
ANY41CT(I210AlphaToARGBRow_Any_NEON,
147
        I210AlphaToARGBRow_NEON,
148
        1,
149
        0,
150
        uint16_t,
151
        2,
152
        4,
153
        7)
154
#endif
155
#ifdef HAS_I410ALPHATOARGBROW_NEON
156
ANY41CT(I410AlphaToARGBRow_Any_NEON,
157
        I410AlphaToARGBRow_NEON,
158
        0,
159
        0,
160
        uint16_t,
161
        2,
162
        4,
163
        7)
164
#endif
165
166
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
167
ANY41CT(I210AlphaToARGBRow_Any_SSSE3,
168
        I210AlphaToARGBRow_SSSE3,
169
        1,
170
        0,
171
        uint16_t,
172
        2,
173
        4,
174
        7)
175
#endif
176
177
#ifdef HAS_I210ALPHATOARGBROW_AVX2
178
ANY41CT(I210AlphaToARGBRow_Any_AVX2,
179
        I210AlphaToARGBRow_AVX2,
180
        1,
181
        0,
182
        uint16_t,
183
        2,
184
        4,
185
        15)
186
#endif
187
188
#ifdef HAS_I410ALPHATOARGBROW_SSSE3
189
ANY41CT(I410AlphaToARGBRow_Any_SSSE3,
190
        I410AlphaToARGBRow_SSSE3,
191
        0,
192
        0,
193
        uint16_t,
194
        2,
195
        4,
196
        7)
197
#endif
198
199
#ifdef HAS_I410ALPHATOARGBROW_AVX2
200
ANY41CT(I410AlphaToARGBRow_Any_AVX2,
201
        I410AlphaToARGBRow_AVX2,
202
        0,
203
        0,
204
        uint16_t,
205
        2,
206
        4,
207
        15)
208
#endif
209
210
#undef ANY41CT
211
212
// Any 4 planes to 1 plane with parameter
213
#define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK)          \
214
  void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
215
0
               const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \
216
0
    SIMD_ALIGNED(STYPE vin[16 * 4]);                                       \
217
0
    SIMD_ALIGNED(DTYPE vout[64]);                                          \
218
0
    memset(vin, 0, sizeof(vin)); /* for msan */                            \
219
0
    int r = width & MASK;                                                  \
220
0
    int n = width & ~MASK;                                                 \
221
0
    if (n > 0) {                                                           \
222
0
      ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n);             \
223
0
    }                                                                      \
224
0
    memcpy(vin, r_buf + n, r * SBPP);                                      \
225
0
    memcpy(vin + 16, g_buf + n, r * SBPP);                                 \
226
0
    memcpy(vin + 32, b_buf + n, r * SBPP);                                 \
227
0
    memcpy(vin + 48, a_buf + n, r * SBPP);                                 \
228
0
    ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1);    \
229
0
    memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP);                    \
230
0
  }
Unexecuted instantiation: MergeAR64Row_Any_AVX2
Unexecuted instantiation: MergeARGB16To8Row_Any_AVX2
231
232
#ifdef HAS_MERGEAR64ROW_AVX2
233
ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
234
#endif
235
236
#ifdef HAS_MERGEAR64ROW_NEON
237
ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
238
#endif
239
240
#ifdef HAS_MERGEARGB16TO8ROW_AVX2
241
ANY41PT(MergeARGB16To8Row_Any_AVX2,
242
        MergeARGB16To8Row_AVX2,
243
        uint16_t,
244
        2,
245
        uint8_t,
246
        4,
247
        15)
248
#endif
249
250
#ifdef HAS_MERGEARGB16TO8ROW_NEON
251
ANY41PT(MergeARGB16To8Row_Any_NEON,
252
        MergeARGB16To8Row_NEON,
253
        uint16_t,
254
        2,
255
        uint8_t,
256
        4,
257
        7)
258
#endif
259
260
#undef ANY41PT
261
262
// Any 3 planes to 1.
263
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)            \
264
  void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                \
265
0
               const uint8_t* v_buf, uint8_t* dst_ptr, int width) {       \
266
0
    SIMD_ALIGNED(uint8_t vin[64 * 3]);                                    \
267
0
    SIMD_ALIGNED(uint8_t vout[64]);                                       \
268
0
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
269
0
    int r = width & MASK;                                                 \
270
0
    int n = width & ~MASK;                                                \
271
0
    if (n > 0) {                                                          \
272
0
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n);                          \
273
0
    }                                                                     \
274
0
    memcpy(vin, y_buf + n, r);                                            \
275
0
    memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));             \
276
0
    memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
277
0
    ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1);                   \
278
0
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
279
0
  }
Unexecuted instantiation: MergeRGBRow_Any_SSSE3
Unexecuted instantiation: MergeXRGBRow_Any_SSE2
Unexecuted instantiation: MergeXRGBRow_Any_AVX2
Unexecuted instantiation: I422ToYUY2Row_Any_SSE2
Unexecuted instantiation: I422ToUYVYRow_Any_SSE2
Unexecuted instantiation: I422ToYUY2Row_Any_AVX2
Unexecuted instantiation: I422ToUYVYRow_Any_AVX2
Unexecuted instantiation: BlendPlaneRow_Any_AVX2
Unexecuted instantiation: BlendPlaneRow_Any_SSSE3
280
281
// Merge functions.
282
#ifdef HAS_MERGERGBROW_SSSE3
283
ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15)
284
#endif
285
#ifdef HAS_MERGERGBROW_NEON
286
ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
287
#endif
288
#ifdef HAS_MERGEXRGBROW_SSE2
289
ANY31(MergeXRGBRow_Any_SSE2, MergeXRGBRow_SSE2, 0, 0, 4, 7)
290
#endif
291
#ifdef HAS_MERGEXRGBROW_AVX2
292
ANY31(MergeXRGBRow_Any_AVX2, MergeXRGBRow_AVX2, 0, 0, 4, 15)
293
#endif
294
#ifdef HAS_MERGEXRGBROW_NEON
295
ANY31(MergeXRGBRow_Any_NEON, MergeXRGBRow_NEON, 0, 0, 4, 15)
296
#endif
297
#ifdef HAS_I422TOYUY2ROW_SSE2
298
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
299
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
300
#endif
301
#ifdef HAS_I422TOYUY2ROW_AVX2
302
ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31)
303
ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31)
304
#endif
305
#ifdef HAS_I422TOYUY2ROW_NEON
306
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
307
#endif
308
#ifdef HAS_I422TOYUY2ROW_MSA
309
ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
310
#endif
311
#ifdef HAS_I422TOYUY2ROW_LSX
312
ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15)
313
#endif
314
#ifdef HAS_I422TOYUY2ROW_LASX
315
ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31)
316
#endif
317
#ifdef HAS_I422TOUYVYROW_NEON
318
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
319
#endif
320
#ifdef HAS_I422TOUYVYROW_MSA
321
ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
322
#endif
323
#ifdef HAS_I422TOUYVYROW_LSX
324
ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15)
325
#endif
326
#ifdef HAS_I422TOUYVYROW_LASX
327
ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31)
328
#endif
329
#ifdef HAS_BLENDPLANEROW_AVX2
330
ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
331
#endif
332
#ifdef HAS_BLENDPLANEROW_SSSE3
333
ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
334
#endif
335
#undef ANY31
336
337
// Note that odd width replication includes 444 due to implementation
338
// on arm that subsamples 444 to 422 internally.
339
// Any 3 planes to 1 with yuvconstants
340
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)           \
341
  void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                \
342
               const uint8_t* v_buf, uint8_t* dst_ptr,                    \
343
36.1k
               const struct YuvConstants* yuvconstants, int width) {      \
344
36.1k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
36.1k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
36.1k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
36.1k
    int r = width & MASK;                                                 \
348
36.1k
    int n = width & ~MASK;                                                \
349
36.1k
    if (n > 0) {                                                          \
350
5.37k
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
5.37k
    }                                                                     \
352
36.1k
    memcpy(vin, y_buf + n, r);                                            \
353
36.1k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
36.1k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
36.1k
    if (width & 1) {                                                      \
356
33.1k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
33.1k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
33.1k
    }                                                                     \
359
36.1k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
36.1k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
36.1k
  }
Unexecuted instantiation: I422ToARGBRow_Any_SSSE3
Unexecuted instantiation: I422ToRGBARow_Any_SSSE3
Unexecuted instantiation: I422ToARGB4444Row_Any_SSSE3
Unexecuted instantiation: I422ToARGB1555Row_Any_SSSE3
Unexecuted instantiation: I422ToRGB565Row_Any_SSSE3
Unexecuted instantiation: I422ToRGB24Row_Any_SSSE3
Unexecuted instantiation: I422ToAR30Row_Any_SSSE3
Unexecuted instantiation: I422ToAR30Row_Any_AVX2
Unexecuted instantiation: I444ToARGBRow_Any_SSSE3
Unexecuted instantiation: I444ToRGB24Row_Any_SSSE3
I422ToRGB24Row_Any_AVX2
Line
Count
Source
343
4.65k
               const struct YuvConstants* yuvconstants, int width) {      \
344
4.65k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
4.65k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
4.65k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
4.65k
    int r = width & MASK;                                                 \
348
4.65k
    int n = width & ~MASK;                                                \
349
4.65k
    if (n > 0) {                                                          \
350
375
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
375
    }                                                                     \
352
4.65k
    memcpy(vin, y_buf + n, r);                                            \
353
4.65k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
4.65k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
4.65k
    if (width & 1) {                                                      \
356
3.67k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
3.67k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
3.67k
    }                                                                     \
359
4.65k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
4.65k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
4.65k
  }
I422ToARGBRow_Any_AVX2
Line
Count
Source
343
4.53k
               const struct YuvConstants* yuvconstants, int width) {      \
344
4.53k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
4.53k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
4.53k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
4.53k
    int r = width & MASK;                                                 \
348
4.53k
    int n = width & ~MASK;                                                \
349
4.53k
    if (n > 0) {                                                          \
350
684
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
684
    }                                                                     \
352
4.53k
    memcpy(vin, y_buf + n, r);                                            \
353
4.53k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
4.53k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
4.53k
    if (width & 1) {                                                      \
356
4.24k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
4.24k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
4.24k
    }                                                                     \
359
4.53k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
4.53k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
4.53k
  }
Unexecuted instantiation: I422ToARGBRow_Any_AVX512BW
I422ToRGBARow_Any_AVX2
Line
Count
Source
343
4.69k
               const struct YuvConstants* yuvconstants, int width) {      \
344
4.69k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
4.69k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
4.69k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
4.69k
    int r = width & MASK;                                                 \
348
4.69k
    int n = width & ~MASK;                                                \
349
4.69k
    if (n > 0) {                                                          \
350
234
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
234
    }                                                                     \
352
4.69k
    memcpy(vin, y_buf + n, r);                                            \
353
4.69k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
4.69k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
4.69k
    if (width & 1) {                                                      \
356
4.39k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
4.39k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
4.39k
    }                                                                     \
359
4.69k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
4.69k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
4.69k
  }
I444ToARGBRow_Any_AVX2
Line
Count
Source
343
12.6k
               const struct YuvConstants* yuvconstants, int width) {      \
344
12.6k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
12.6k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
12.6k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
12.6k
    int r = width & MASK;                                                 \
348
12.6k
    int n = width & ~MASK;                                                \
349
12.6k
    if (n > 0) {                                                          \
350
2.23k
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
2.23k
    }                                                                     \
352
12.6k
    memcpy(vin, y_buf + n, r);                                            \
353
12.6k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
12.6k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
12.6k
    if (width & 1) {                                                      \
356
11.7k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
11.7k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
11.7k
    }                                                                     \
359
12.6k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
12.6k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
12.6k
  }
I444ToRGB24Row_Any_AVX2
Line
Count
Source
343
6.02k
               const struct YuvConstants* yuvconstants, int width) {      \
344
6.02k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
6.02k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
6.02k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
6.02k
    int r = width & MASK;                                                 \
348
6.02k
    int n = width & ~MASK;                                                \
349
6.02k
    if (n > 0) {                                                          \
350
1.43k
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
1.43k
    }                                                                     \
352
6.02k
    memcpy(vin, y_buf + n, r);                                            \
353
6.02k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
6.02k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
6.02k
    if (width & 1) {                                                      \
356
5.66k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
5.66k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
5.66k
    }                                                                     \
359
6.02k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
6.02k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
6.02k
  }
Unexecuted instantiation: I422ToARGB4444Row_Any_AVX2
Unexecuted instantiation: I422ToARGB1555Row_Any_AVX2
I422ToRGB565Row_Any_AVX2
Line
Count
Source
343
3.68k
               const struct YuvConstants* yuvconstants, int width) {      \
344
3.68k
    SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
345
3.68k
    SIMD_ALIGNED(uint8_t vout[128]);                                      \
346
3.68k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
347
3.68k
    int r = width & MASK;                                                 \
348
3.68k
    int n = width & ~MASK;                                                \
349
3.68k
    if (n > 0) {                                                          \
350
417
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
351
417
    }                                                                     \
352
3.68k
    memcpy(vin, y_buf + n, r);                                            \
353
3.68k
    memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
354
3.68k
    memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
355
3.68k
    if (width & 1) {                                                      \
356
3.39k
      vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
357
3.39k
      vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
358
3.39k
    }                                                                     \
359
3.68k
    ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
360
3.68k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
361
3.68k
  }
362
363
#ifdef HAS_I422TOARGBROW_SSSE3
364
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
365
#endif
366
#ifdef HAS_I422TORGBAROW_SSSE3
367
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
368
#endif
369
#ifdef HAS_I422TOARGB4444ROW_SSSE3
370
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
371
#endif
372
#ifdef HAS_I422TOARGB1555ROW_SSSE3
373
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
374
#endif
375
#ifdef HAS_I422TORGB565ROW_SSSE3
376
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
377
#endif
378
#ifdef HAS_I422TORGB24ROW_SSSE3
379
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
380
#endif
381
#ifdef HAS_I422TOAR30ROW_SSSE3
382
ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
383
#endif
384
#ifdef HAS_I422TOAR30ROW_AVX2
385
ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
386
#endif
387
#ifdef HAS_I444TOARGBROW_SSSE3
388
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
389
#endif
390
#ifdef HAS_I444TORGB24ROW_SSSE3
391
ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
392
#endif
393
#ifdef HAS_I422TORGB24ROW_AVX2
394
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
395
#endif
396
#ifdef HAS_I422TOARGBROW_AVX2
397
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
398
#endif
399
#ifdef HAS_I422TOARGBROW_AVX512BW
400
ANY31C(I422ToARGBRow_Any_AVX512BW, I422ToARGBRow_AVX512BW, 1, 0, 4, 31)
401
#endif
402
#ifdef HAS_I422TORGBAROW_AVX2
403
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
404
#endif
405
#ifdef HAS_I444TOARGBROW_AVX2
406
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
407
#endif
408
#ifdef HAS_I444TORGB24ROW_AVX2
409
ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31)
410
#endif
411
#ifdef HAS_I422TOARGB4444ROW_AVX2
412
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
413
#endif
414
#ifdef HAS_I422TOARGB1555ROW_AVX2
415
ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
416
#endif
417
#ifdef HAS_I422TORGB565ROW_AVX2
418
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
419
#endif
420
#ifdef HAS_I444TORGB24ROW_NEON
421
ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7)
422
#endif
423
#ifdef HAS_I422TOAR30ROW_NEON
424
ANY31C(I422ToAR30Row_Any_NEON, I422ToAR30Row_NEON, 1, 0, 4, 7)
425
#endif
426
#ifdef HAS_I422TOARGBROW_NEON
427
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
428
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
429
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
430
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
431
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
432
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
433
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
434
#endif
435
#ifdef HAS_I422TOARGBROW_MSA
436
ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
437
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
438
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
439
ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
440
ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
441
ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
442
ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
443
#endif
444
#ifdef HAS_I422TOARGBROW_LSX
445
ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15)
446
ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15)
447
ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15)
448
ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15)
449
ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15)
450
ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15)
451
#endif
452
#ifdef HAS_I422TOARGBROW_LASX
453
ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31)
454
ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31)
455
ANY31C(I422ToRGB24Row_Any_LASX, I422ToRGB24Row_LASX, 1, 0, 3, 31)
456
ANY31C(I422ToRGB565Row_Any_LASX, I422ToRGB565Row_LASX, 1, 0, 2, 31)
457
ANY31C(I422ToARGB4444Row_Any_LASX, I422ToARGB4444Row_LASX, 1, 0, 2, 31)
458
ANY31C(I422ToARGB1555Row_Any_LASX, I422ToARGB1555Row_LASX, 1, 0, 2, 31)
459
#endif
460
#ifdef HAS_I444TOARGBROW_LSX
461
ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15)
462
#endif
463
#undef ANY31C
464
465
// Any 3 planes of 16 bit to 1 with yuvconstants
466
// TODO(fbarchard): consider sharing this code with ANY31C
467
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
468
  void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf,            \
469
               uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
470
14.4k
               int width) {                                               \
471
14.4k
    SIMD_ALIGNED(T vin[16 * 3]);                                          \
472
14.4k
    SIMD_ALIGNED(uint8_t vout[64]);                                       \
473
14.4k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
474
14.4k
    int r = width & MASK;                                                 \
475
14.4k
    int n = width & ~MASK;                                                \
476
14.4k
    if (n > 0) {                                                          \
477
3.45k
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
478
3.45k
    }                                                                     \
479
14.4k
    memcpy(vin, y_buf + n, r * SBPP);                                     \
480
14.4k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
481
14.4k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
482
14.4k
    ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1);      \
483
14.4k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
484
14.4k
  }
Unexecuted instantiation: I210ToAR30Row_Any_SSSE3
Unexecuted instantiation: I210ToARGBRow_Any_SSSE3
I210ToARGBRow_Any_AVX2
Line
Count
Source
470
3.67k
               int width) {                                               \
471
3.67k
    SIMD_ALIGNED(T vin[16 * 3]);                                          \
472
3.67k
    SIMD_ALIGNED(uint8_t vout[64]);                                       \
473
3.67k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
474
3.67k
    int r = width & MASK;                                                 \
475
3.67k
    int n = width & ~MASK;                                                \
476
3.67k
    if (n > 0) {                                                          \
477
651
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
478
651
    }                                                                     \
479
3.67k
    memcpy(vin, y_buf + n, r * SBPP);                                     \
480
3.67k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
481
3.67k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
482
3.67k
    ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1);      \
483
3.67k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
484
3.67k
  }
Unexecuted instantiation: I210ToAR30Row_Any_AVX2
Unexecuted instantiation: I410ToAR30Row_Any_SSSE3
Unexecuted instantiation: I410ToARGBRow_Any_SSSE3
I410ToARGBRow_Any_AVX2
Line
Count
Source
470
7.01k
               int width) {                                               \
471
7.01k
    SIMD_ALIGNED(T vin[16 * 3]);                                          \
472
7.01k
    SIMD_ALIGNED(uint8_t vout[64]);                                       \
473
7.01k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
474
7.01k
    int r = width & MASK;                                                 \
475
7.01k
    int n = width & ~MASK;                                                \
476
7.01k
    if (n > 0) {                                                          \
477
2.45k
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
478
2.45k
    }                                                                     \
479
7.01k
    memcpy(vin, y_buf + n, r * SBPP);                                     \
480
7.01k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
481
7.01k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
482
7.01k
    ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1);      \
483
7.01k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
484
7.01k
  }
Unexecuted instantiation: I410ToAR30Row_Any_AVX2
Unexecuted instantiation: I212ToAR30Row_Any_SSSE3
Unexecuted instantiation: I212ToARGBRow_Any_SSSE3
I212ToARGBRow_Any_AVX2
Line
Count
Source
470
3.78k
               int width) {                                               \
471
3.78k
    SIMD_ALIGNED(T vin[16 * 3]);                                          \
472
3.78k
    SIMD_ALIGNED(uint8_t vout[64]);                                       \
473
3.78k
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
474
3.78k
    int r = width & MASK;                                                 \
475
3.78k
    int n = width & ~MASK;                                                \
476
3.78k
    if (n > 0) {                                                          \
477
345
      ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
478
345
    }                                                                     \
479
3.78k
    memcpy(vin, y_buf + n, r * SBPP);                                     \
480
3.78k
    memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
481
3.78k
    memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
482
3.78k
    ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1);      \
483
3.78k
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
484
3.78k
  }
Unexecuted instantiation: I212ToAR30Row_Any_AVX2
485
486
#ifdef HAS_I210TOAR30ROW_SSSE3
487
ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
488
#endif
489
#ifdef HAS_I210TOARGBROW_SSSE3
490
ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
491
#endif
492
#ifdef HAS_I210TOARGBROW_AVX2
493
ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
494
#endif
495
#ifdef HAS_I210TOAR30ROW_AVX2
496
ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
497
#endif
498
#ifdef HAS_I410TOAR30ROW_SSSE3
499
ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
500
#endif
501
#ifdef HAS_I410TOARGBROW_SSSE3
502
ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
503
#endif
504
#ifdef HAS_I410TOARGBROW_AVX2
505
ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
506
#endif
507
#ifdef HAS_I410TOAR30ROW_AVX2
508
ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
509
#endif
510
#ifdef HAS_I212TOAR30ROW_SSSE3
511
ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
512
#endif
513
#ifdef HAS_I212TOARGBROW_SSSE3
514
ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
515
#endif
516
#ifdef HAS_I212TOARGBROW_AVX2
517
ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
518
#endif
519
#ifdef HAS_I212TOAR30ROW_AVX2
520
ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
521
#endif
522
#ifdef HAS_I210TOARGBROW_NEON
523
ANY31CT(I210ToARGBRow_Any_NEON, I210ToARGBRow_NEON, 1, 0, uint16_t, 2, 4, 7)
524
#endif
525
#ifdef HAS_I410TOARGBROW_NEON
526
ANY31CT(I410ToARGBRow_Any_NEON, I410ToARGBRow_NEON, 0, 0, uint16_t, 2, 4, 7)
527
#endif
528
#ifdef HAS_I210TOAR30ROW_NEON
529
ANY31CT(I210ToAR30Row_Any_NEON, I210ToAR30Row_NEON, 1, 0, uint16_t, 2, 4, 7)
530
#endif
531
#ifdef HAS_I410TOAR30ROW_NEON
532
ANY31CT(I410ToAR30Row_Any_NEON, I410ToAR30Row_NEON, 0, 0, uint16_t, 2, 4, 7)
533
#endif
534
#ifdef HAS_I212TOARGBROW_NEON
535
ANY31CT(I212ToARGBRow_Any_NEON, I212ToARGBRow_NEON, 1, 0, uint16_t, 2, 4, 7)
536
#endif
537
#ifdef HAS_I212TOAR30ROW_NEON
538
ANY31CT(I212ToAR30Row_Any_NEON, I212ToAR30Row_NEON, 1, 0, uint16_t, 2, 4, 7)
539
#endif
540
#undef ANY31CT
541
542
// Any 3 planes to 1 plane with parameter
543
#define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK)          \
544
  void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
545
0
               DTYPE* dst_ptr, int depth, int width) {                     \
546
0
    SIMD_ALIGNED(STYPE vin[16 * 3]);                                       \
547
0
    SIMD_ALIGNED(DTYPE vout[64]);                                          \
548
0
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                   \
549
0
    int r = width & MASK;                                                  \
550
0
    int n = width & ~MASK;                                                 \
551
0
    if (n > 0) {                                                           \
552
0
      ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n);                    \
553
0
    }                                                                      \
554
0
    memcpy(vin, r_buf + n, r * SBPP);                                      \
555
0
    memcpy(vin + 16, g_buf + n, r * SBPP);                                 \
556
0
    memcpy(vin + 32, b_buf + n, r * SBPP);                                 \
557
0
    ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1);              \
558
0
    memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP);                    \
559
0
  }
Unexecuted instantiation: MergeXR30Row_Any_AVX2
Unexecuted instantiation: MergeXR64Row_Any_AVX2
Unexecuted instantiation: MergeXRGB16To8Row_Any_AVX2
560
561
#ifdef HAS_MERGEXR30ROW_AVX2
562
ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
563
#endif
564
565
#ifdef HAS_MERGEXR30ROW_NEON
566
ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
567
ANY31PT(MergeXR30Row_10_Any_NEON,
568
        MergeXR30Row_10_NEON,
569
        uint16_t,
570
        2,
571
        uint8_t,
572
        4,
573
        7)
574
#endif
575
576
#ifdef HAS_MERGEXR64ROW_AVX2
577
ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
578
#endif
579
580
#ifdef HAS_MERGEXR64ROW_NEON
581
ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
582
#endif
583
584
#ifdef HAS_MERGEXRGB16TO8ROW_AVX2
585
ANY31PT(MergeXRGB16To8Row_Any_AVX2,
586
        MergeXRGB16To8Row_AVX2,
587
        uint16_t,
588
        2,
589
        uint8_t,
590
        4,
591
        15)
592
#endif
593
594
#ifdef HAS_MERGEXRGB16TO8ROW_NEON
595
ANY31PT(MergeXRGB16To8Row_Any_NEON,
596
        MergeXRGB16To8Row_NEON,
597
        uint16_t,
598
        2,
599
        uint8_t,
600
        4,
601
        7)
602
#endif
603
604
#undef ANY31PT
605
606
// Any 2 planes to 1.
607
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
608
  void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
609
0
               int width) {                                                   \
610
0
    SIMD_ALIGNED(uint8_t vin[128 * 2]);                                       \
611
0
    SIMD_ALIGNED(uint8_t vout[128]);                                          \
612
0
    memset(vin, 0, sizeof(vin)); /* for msan */                               \
613
0
    int r = width & MASK;                                                     \
614
0
    int n = width & ~MASK;                                                    \
615
0
    if (n > 0) {                                                              \
616
0
      ANY_SIMD(y_buf, uv_buf, dst_ptr, n);                                    \
617
0
    }                                                                         \
618
0
    memcpy(vin, y_buf + n * SBPP, r * SBPP);                                  \
619
0
    memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2,                        \
620
0
           SS(r, UVSHIFT) * SBPP2);                                           \
621
0
    ANY_SIMD(vin, vin + 128, vout, MASK + 1);                                 \
622
0
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                                 \
623
0
  }
Unexecuted instantiation: MergeUVRow_Any_SSE2
Unexecuted instantiation: MergeUVRow_Any_AVX2
Unexecuted instantiation: MergeUVRow_Any_AVX512BW
Unexecuted instantiation: NV21ToYUV24Row_Any_SSSE3
Unexecuted instantiation: NV21ToYUV24Row_Any_AVX2
Unexecuted instantiation: ARGBAddRow_Any_SSE2
Unexecuted instantiation: ARGBSubtractRow_Any_SSE2
Unexecuted instantiation: ARGBAddRow_Any_AVX2
Unexecuted instantiation: ARGBSubtractRow_Any_AVX2
Unexecuted instantiation: SobelRow_Any_SSE2
Unexecuted instantiation: SobelToPlaneRow_Any_SSE2
Unexecuted instantiation: SobelXYRow_Any_SSE2
624
625
// Merge functions.
626
#ifdef HAS_MERGEUVROW_SSE2
627
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
628
#endif
629
#ifdef HAS_MERGEUVROW_AVX2
630
ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 15)
631
#endif
632
#ifdef HAS_MERGEUVROW_AVX512BW
633
ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31)
634
#endif
635
#ifdef HAS_MERGEUVROW_NEON
636
ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
637
#endif
638
#ifdef HAS_MERGEUVROW_MSA
639
ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
640
#endif
641
#ifdef HAS_MERGEUVROW_LSX
642
ANY21(MergeUVRow_Any_LSX, MergeUVRow_LSX, 0, 1, 1, 2, 15)
643
#endif
644
#ifdef HAS_NV21TOYUV24ROW_NEON
645
ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
646
#endif
647
#ifdef HAS_NV21TOYUV24ROW_SSSE3
648
ANY21(NV21ToYUV24Row_Any_SSSE3, NV21ToYUV24Row_SSSE3, 1, 1, 2, 3, 15)
649
#endif
650
#ifdef HAS_NV21TOYUV24ROW_AVX2
651
ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
652
#endif
653
// Math functions.
654
#ifdef HAS_ARGBMULTIPLYROW_SSE2
655
ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
656
#endif
657
#ifdef HAS_ARGBADDROW_SSE2
658
ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
659
#endif
660
#ifdef HAS_ARGBSUBTRACTROW_SSE2
661
ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
662
#endif
663
#ifdef HAS_ARGBMULTIPLYROW_AVX2
664
ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
665
#endif
666
#ifdef HAS_ARGBADDROW_AVX2
667
ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
668
#endif
669
#ifdef HAS_ARGBSUBTRACTROW_AVX2
670
ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
671
#endif
672
#ifdef HAS_ARGBMULTIPLYROW_NEON
673
ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
674
#endif
675
#ifdef HAS_ARGBADDROW_NEON
676
ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
677
#endif
678
#ifdef HAS_ARGBSUBTRACTROW_NEON
679
ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
680
#endif
681
#ifdef HAS_ARGBMULTIPLYROW_MSA
682
ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
683
#endif
684
#ifdef HAS_ARGBMULTIPLYROW_LSX
685
ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3)
686
#endif
687
#ifdef HAS_ARGBMULTIPLYROW_LASX
688
ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7)
689
#endif
690
#ifdef HAS_ARGBADDROW_MSA
691
ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
692
#endif
693
#ifdef HAS_ARGBADDROW_LSX
694
ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3)
695
#endif
696
#ifdef HAS_ARGBADDROW_LASX
697
ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7)
698
#endif
699
#ifdef HAS_ARGBSUBTRACTROW_MSA
700
ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
701
#endif
702
#ifdef HAS_ARGBSUBTRACTROW_LSX
703
ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3)
704
#endif
705
#ifdef HAS_ARGBSUBTRACTROW_LASX
706
ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7)
707
#endif
708
#ifdef HAS_SOBELROW_SSE2
709
ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
710
#endif
711
#ifdef HAS_SOBELROW_NEON
712
ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
713
#endif
714
#ifdef HAS_SOBELROW_MSA
715
ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
716
#endif
717
#ifdef HAS_SOBELROW_LSX
718
ANY21(SobelRow_Any_LSX, SobelRow_LSX, 0, 1, 1, 4, 15)
719
#endif
720
#ifdef HAS_SOBELTOPLANEROW_SSE2
721
ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
722
#endif
723
#ifdef HAS_SOBELTOPLANEROW_NEON
724
ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
725
#endif
726
#ifdef HAS_SOBELTOPLANEROW_MSA
727
ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
728
#endif
729
#ifdef HAS_SOBELTOPLANEROW_LSX
730
ANY21(SobelToPlaneRow_Any_LSX, SobelToPlaneRow_LSX, 0, 1, 1, 1, 31)
731
#endif
732
#ifdef HAS_SOBELXYROW_SSE2
733
ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
734
#endif
735
#ifdef HAS_SOBELXYROW_NEON
736
ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
737
#endif
738
#ifdef HAS_SOBELXYROW_MSA
739
ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
740
#endif
741
#ifdef HAS_SOBELXYROW_LSX
742
ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15)
743
#endif
744
#undef ANY21
745
746
// Any 2 planes to 1 with stride
747
// width is measured in source pixels. 4 bytes contains 2 pixels
748
#define ANY21S(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                        \
749
  void NAMEANY(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, \
750
0
               int width) {                                               \
751
0
    SIMD_ALIGNED(uint8_t vin[32 * 2]);                                    \
752
0
    SIMD_ALIGNED(uint8_t vout[32]);                                       \
753
0
    memset(vin, 0, sizeof(vin)); /* for msan */                           \
754
0
    int awidth = (width + 1) / 2;                                         \
755
0
    int r = awidth & MASK;                                                \
756
0
    int n = awidth & ~MASK;                                               \
757
0
    if (n > 0) {                                                          \
758
0
      ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2);                     \
759
0
    }                                                                     \
760
0
    memcpy(vin, src_yuy2 + n * SBPP, r * SBPP);                           \
761
0
    memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP);        \
762
0
    ANY_SIMD(vin, 32, vout, MASK + 1);                                    \
763
0
    memcpy(dst_uv + n * BPP, vout, r * BPP);                              \
764
0
  }
Unexecuted instantiation: YUY2ToNVUVRow_Any_SSE2
Unexecuted instantiation: YUY2ToNVUVRow_Any_AVX2
765
766
#ifdef HAS_YUY2TONVUVROW_NEON
767
ANY21S(YUY2ToNVUVRow_Any_NEON, YUY2ToNVUVRow_NEON, 4, 2, 7)
768
#endif
769
#ifdef HAS_YUY2TONVUVROW_SSE2
770
ANY21S(YUY2ToNVUVRow_Any_SSE2, YUY2ToNVUVRow_SSE2, 4, 2, 7)
771
#endif
772
#ifdef HAS_YUY2TONVUVROW_AVX2
773
ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15)
774
#endif
775
776
// Any 2 planes to 1 with yuvconstants
777
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)            \
778
  void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
779
0
               const struct YuvConstants* yuvconstants, int width) {          \
780
0
    SIMD_ALIGNED(uint8_t vin[128 * 2]);                                       \
781
0
    SIMD_ALIGNED(uint8_t vout[128]);                                          \
782
0
    memset(vin, 0, sizeof(vin)); /* for msan */                               \
783
0
    int r = width & MASK;                                                     \
784
0
    int n = width & ~MASK;                                                    \
785
0
    if (n > 0) {                                                              \
786
0
      ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                      \
787
0
    }                                                                         \
788
0
    memcpy(vin, y_buf + n * SBPP, r * SBPP);                                  \
789
0
    memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2,                        \
790
0
           SS(r, UVSHIFT) * SBPP2);                                           \
791
0
    ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1);                   \
792
0
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                                 \
793
0
  }
Unexecuted instantiation: NV12ToARGBRow_Any_SSSE3
Unexecuted instantiation: NV12ToARGBRow_Any_AVX2
Unexecuted instantiation: NV21ToARGBRow_Any_SSSE3
Unexecuted instantiation: NV21ToARGBRow_Any_AVX2
Unexecuted instantiation: NV12ToRGB24Row_Any_SSSE3
Unexecuted instantiation: NV21ToRGB24Row_Any_SSSE3
Unexecuted instantiation: NV12ToRGB24Row_Any_AVX2
Unexecuted instantiation: NV21ToRGB24Row_Any_AVX2
Unexecuted instantiation: NV12ToRGB565Row_Any_SSSE3
Unexecuted instantiation: NV12ToRGB565Row_Any_AVX2
794
795
// Biplanar to RGB.
796
#ifdef HAS_NV12TOARGBROW_SSSE3
797
ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
798
#endif
799
#ifdef HAS_NV12TOARGBROW_AVX2
800
ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
801
#endif
802
#ifdef HAS_NV12TOARGBROW_NEON
803
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
804
#endif
805
#ifdef HAS_NV12TOARGBROW_MSA
806
ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
807
#endif
808
#ifdef HAS_NV12TOARGBROW_LSX
809
ANY21C(NV12ToARGBRow_Any_LSX, NV12ToARGBRow_LSX, 1, 1, 2, 4, 7)
810
#endif
811
#ifdef HAS_NV12TOARGBROW_LASX
812
ANY21C(NV12ToARGBRow_Any_LASX, NV12ToARGBRow_LASX, 1, 1, 2, 4, 15)
813
#endif
814
#ifdef HAS_NV21TOARGBROW_SSSE3
815
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
816
#endif
817
#ifdef HAS_NV21TOARGBROW_AVX2
818
ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
819
#endif
820
#ifdef HAS_NV21TOARGBROW_NEON
821
ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
822
#endif
823
#ifdef HAS_NV21TOARGBROW_MSA
824
ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
825
#endif
826
#ifdef HAS_NV21TOARGBROW_LSX
827
ANY21C(NV21ToARGBRow_Any_LSX, NV21ToARGBRow_LSX, 1, 1, 2, 4, 7)
828
#endif
829
#ifdef HAS_NV21TOARGBROW_LASX
830
ANY21C(NV21ToARGBRow_Any_LASX, NV21ToARGBRow_LASX, 1, 1, 2, 4, 15)
831
#endif
832
#ifdef HAS_NV12TORGB24ROW_NEON
833
ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
834
#endif
835
#ifdef HAS_NV21TORGB24ROW_NEON
836
ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
837
#endif
838
#ifdef HAS_NV12TORGB24ROW_SSSE3
839
ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
840
#endif
841
#ifdef HAS_NV21TORGB24ROW_SSSE3
842
ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
843
#endif
844
#ifdef HAS_NV12TORGB24ROW_AVX2
845
ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
846
#endif
847
#ifdef HAS_NV21TORGB24ROW_AVX2
848
ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
849
#endif
850
#ifdef HAS_NV12TORGB565ROW_SSSE3
851
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
852
#endif
853
#ifdef HAS_NV12TORGB565ROW_AVX2
854
ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
855
#endif
856
#ifdef HAS_NV12TORGB565ROW_NEON
857
ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
858
#endif
859
#ifdef HAS_NV12TORGB565ROW_MSA
860
ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
861
#endif
862
#ifdef HAS_NV12TORGB565ROW_LSX
863
ANY21C(NV12ToRGB565Row_Any_LSX, NV12ToRGB565Row_LSX, 1, 1, 2, 2, 7)
864
#endif
865
#ifdef HAS_NV12TORGB565ROW_LASX
866
ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15)
867
#endif
868
#undef ANY21C
869
870
// Any 2 planes of 16 bit to 1 with yuvconstants
871
#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)     \
872
  void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr,             \
873
0
               const struct YuvConstants* yuvconstants, int width) {          \
874
0
    SIMD_ALIGNED(T vin[16 * 2]);                                              \
875
0
    SIMD_ALIGNED(uint8_t vout[64]);                                           \
876
0
    memset(vin, 0, sizeof(vin)); /* for msan */                               \
877
0
    int r = width & MASK;                                                     \
878
0
    int n = width & ~MASK;                                                    \
879
0
    if (n > 0) {                                                              \
880
0
      ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                      \
881
0
    }                                                                         \
882
0
    memcpy(vin, y_buf + n, r * SBPP);                                         \
883
0
    memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
884
0
    ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1);                    \
885
0
    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);     \
886
0
  }
Unexecuted instantiation: P210ToAR30Row_Any_SSSE3
Unexecuted instantiation: P210ToARGBRow_Any_SSSE3
Unexecuted instantiation: P210ToARGBRow_Any_AVX2
Unexecuted instantiation: P210ToAR30Row_Any_AVX2
Unexecuted instantiation: P410ToAR30Row_Any_SSSE3
Unexecuted instantiation: P410ToARGBRow_Any_SSSE3
Unexecuted instantiation: P410ToARGBRow_Any_AVX2
Unexecuted instantiation: P410ToAR30Row_Any_AVX2
887
888
#ifdef HAS_P210TOAR30ROW_SSSE3
889
ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
890
#endif
891
#ifdef HAS_P210TOARGBROW_SSSE3
892
ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
893
#endif
894
#ifdef HAS_P210TOARGBROW_AVX2
895
ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
896
#endif
897
#ifdef HAS_P210TOAR30ROW_AVX2
898
ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
899
#endif
900
#ifdef HAS_P210TOAR30ROW_NEON
901
ANY21CT(P210ToAR30Row_Any_NEON, P210ToAR30Row_NEON, 1, 0, uint16_t, 2, 4, 7)
902
#endif
903
#ifdef HAS_P210TOARGBROW_NEON
904
ANY21CT(P210ToARGBRow_Any_NEON, P210ToARGBRow_NEON, 1, 0, uint16_t, 2, 4, 7)
905
#endif
906
#ifdef HAS_P410TOAR30ROW_SSSE3
907
ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
908
#endif
909
#ifdef HAS_P410TOARGBROW_SSSE3
910
ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
911
#endif
912
#ifdef HAS_P410TOARGBROW_AVX2
913
ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
914
#endif
915
#ifdef HAS_P410TOAR30ROW_AVX2
916
ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
917
#endif
918
#ifdef HAS_P410TOAR30ROW_NEON
919
ANY21CT(P410ToAR30Row_Any_NEON, P410ToAR30Row_NEON, 0, 0, uint16_t, 2, 4, 7)
920
#endif
921
#ifdef HAS_P410TOARGBROW_NEON
922
ANY21CT(P410ToARGBRow_Any_NEON, P410ToARGBRow_NEON, 0, 0, uint16_t, 2, 4, 7)
923
#endif
924
925
#undef ANY21CT
926
927
// Any 2 16 bit planes with parameter to 1
928
#define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                     \
929
  void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
930
0
               int width) {                                          \
931
0
    SIMD_ALIGNED(T vin[16 * 2]);                                     \
932
0
    SIMD_ALIGNED(T vout[16]);                                        \
933
0
    memset(vin, 0, sizeof(vin)); /* for msan */                      \
934
0
    int r = width & MASK;                                            \
935
0
    int n = width & ~MASK;                                           \
936
0
    if (n > 0) {                                                     \
937
0
      ANY_SIMD(src_u, src_v, dst_uv, depth, n);                      \
938
0
    }                                                                \
939
0
    memcpy(vin, src_u + n, r * BPP);                                 \
940
0
    memcpy(vin + 16, src_v + n, r * BPP);                            \
941
0
    ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1);                  \
942
0
    memcpy(dst_uv + n * 2, vout, r * BPP * 2);                       \
943
0
  }
944
945
#ifdef HAS_MERGEUVROW_16_AVX2
946
ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 7)
947
#endif
948
#ifdef HAS_MERGEUVROW_16_NEON
949
ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
950
#endif
951
952
#undef ANY21CT
953
954
// Any 1 to 1.
955
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
956
8.36M
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
957
8.36M
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
958
8.36M
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
959
8.36M
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
960
8.36M
    int r = width & MASK;                                                \
961
8.36M
    int n = width & ~MASK;                                               \
962
8.36M
    if (n > 0) {                                                         \
963
3.87M
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
964
3.87M
    }                                                                    \
965
8.36M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
966
8.36M
    ANY_SIMD(vin, vout, MASK + 1);                                       \
967
8.36M
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
968
8.36M
  }
Unexecuted instantiation: CopyRow_Any_AVX512BW
Unexecuted instantiation: CopyRow_Any_AVX
Unexecuted instantiation: CopyRow_Any_SSE2
Unexecuted instantiation: ARGBToRGB24Row_Any_SSSE3
Unexecuted instantiation: ARGBToRAWRow_Any_SSSE3
Unexecuted instantiation: ARGBToRGB565Row_Any_SSE2
Unexecuted instantiation: ARGBToARGB1555Row_Any_SSE2
Unexecuted instantiation: ARGBToARGB4444Row_Any_SSE2
Unexecuted instantiation: ARGBToRGB24Row_Any_AVX2
Unexecuted instantiation: ARGBToRGB24Row_Any_AVX512VBMI
Unexecuted instantiation: ARGBToRAWRow_Any_AVX2
Unexecuted instantiation: ABGRToAR30Row_Any_SSSE3
Unexecuted instantiation: ARGBToAR30Row_Any_SSSE3
Unexecuted instantiation: ABGRToAR30Row_Any_AVX2
Unexecuted instantiation: ARGBToAR30Row_Any_AVX2
Unexecuted instantiation: J400ToARGBRow_Any_SSE2
Unexecuted instantiation: RGB24ToARGBRow_Any_SSSE3
Unexecuted instantiation: RAWToARGBRow_Any_SSSE3
Unexecuted instantiation: RGB565ToARGBRow_Any_SSE2
Unexecuted instantiation: ARGB1555ToARGBRow_Any_SSE2
Unexecuted instantiation: ARGB4444ToARGBRow_Any_SSE2
RAWToARGBRow_Any_AVX2
Line
Count
Source
956
3.59M
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
957
3.59M
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
958
3.59M
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
959
3.59M
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
960
3.59M
    int r = width & MASK;                                                \
961
3.59M
    int n = width & ~MASK;                                               \
962
3.59M
    if (n > 0) {                                                         \
963
1.60M
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
964
1.60M
    }                                                                    \
965
3.59M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
966
3.59M
    ANY_SIMD(vin, vout, MASK + 1);                                       \
967
3.59M
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
968
3.59M
  }
Unexecuted instantiation: RAWToRGBARow_Any_SSSE3
Unexecuted instantiation: RAWToRGB24Row_Any_SSSE3
Unexecuted instantiation: ARGBToYRow_Any_AVX2
Unexecuted instantiation: ABGRToYRow_Any_AVX2
ARGBToYJRow_Any_AVX2
Line
Count
Source
956
4.77M
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
957
4.77M
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
958
4.77M
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
959
4.77M
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
960
4.77M
    int r = width & MASK;                                                \
961
4.77M
    int n = width & ~MASK;                                               \
962
4.77M
    if (n > 0) {                                                         \
963
2.27M
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
964
2.27M
    }                                                                    \
965
4.77M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
966
4.77M
    ANY_SIMD(vin, vout, MASK + 1);                                       \
967
4.77M
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
968
4.77M
  }
ABGRToYJRow_Any_AVX2
Line
Count
Source
956
201
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
957
201
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
958
201
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
959
201
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
960
201
    int r = width & MASK;                                                \
961
201
    int n = width & ~MASK;                                               \
962
201
    if (n > 0) {                                                         \
963
201
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
964
201
    }                                                                    \
965
201
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
966
201
    ANY_SIMD(vin, vout, MASK + 1);                                       \
967
201
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
968
201
  }
Unexecuted instantiation: RGBAToYJRow_Any_AVX2
Unexecuted instantiation: UYVYToYRow_Any_AVX2
Unexecuted instantiation: YUY2ToYRow_Any_AVX2
Unexecuted instantiation: ARGBToYRow_Any_SSSE3
Unexecuted instantiation: BGRAToYRow_Any_SSSE3
Unexecuted instantiation: ABGRToYRow_Any_SSSE3
Unexecuted instantiation: RGBAToYRow_Any_SSSE3
Unexecuted instantiation: YUY2ToYRow_Any_SSE2
Unexecuted instantiation: UYVYToYRow_Any_SSE2
Unexecuted instantiation: ARGBToYJRow_Any_SSSE3
Unexecuted instantiation: ABGRToYJRow_Any_SSSE3
Unexecuted instantiation: RGBAToYJRow_Any_SSSE3
Unexecuted instantiation: RGB24ToYJRow_Any_AVX2
Unexecuted instantiation: RGB24ToYJRow_Any_SSSE3
RAWToYJRow_Any_AVX2
Line
Count
Source
956
56
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
957
56
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
958
56
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
959
56
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
960
56
    int r = width & MASK;                                                \
961
56
    int n = width & ~MASK;                                               \
962
56
    if (n > 0) {                                                         \
963
55
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
964
55
    }                                                                    \
965
56
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
966
56
    ANY_SIMD(vin, vout, MASK + 1);                                       \
967
56
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
968
56
  }
Unexecuted instantiation: RAWToYJRow_Any_SSSE3
Unexecuted instantiation: SwapUVRow_Any_SSSE3
Unexecuted instantiation: SwapUVRow_Any_AVX2
Unexecuted instantiation: ARGBAttenuateRow_Any_SSSE3
Unexecuted instantiation: ARGBUnattenuateRow_Any_SSE2
ARGBAttenuateRow_Any_AVX2
Line
Count
Source
956
355
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
957
355
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
958
355
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
959
355
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
960
355
    int r = width & MASK;                                                \
961
355
    int n = width & ~MASK;                                               \
962
355
    if (n > 0) {                                                         \
963
117
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
964
117
    }                                                                    \
965
355
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
966
355
    ANY_SIMD(vin, vout, MASK + 1);                                       \
967
355
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
968
355
  }
Unexecuted instantiation: ARGBUnattenuateRow_Any_AVX2
Unexecuted instantiation: ARGBExtractAlphaRow_Any_SSE2
Unexecuted instantiation: ARGBExtractAlphaRow_Any_AVX2
969
970
#ifdef HAS_COPYROW_AVX512BW
971
ANY11(CopyRow_Any_AVX512BW, CopyRow_AVX512BW, 0, 1, 1, 127)
972
#endif
973
#ifdef HAS_COPYROW_AVX
974
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
975
#endif
976
#ifdef HAS_COPYROW_SSE2
977
ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
978
#endif
979
#ifdef HAS_COPYROW_NEON
980
ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
981
#endif
982
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
983
ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
984
ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
985
ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
986
ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
987
ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
988
#endif
989
#if defined(HAS_ARGBTORGB24ROW_AVX2)
990
ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31)
991
#endif
992
#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
993
ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31)
994
#endif
995
#if defined(HAS_ARGBTORAWROW_AVX2)
996
ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31)
997
#endif
998
#if defined(HAS_ARGBTORGB565ROW_AVX2)
999
ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
1000
#endif
1001
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
1002
ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
1003
ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
1004
#endif
1005
#if defined(HAS_ABGRTOAR30ROW_SSSE3)
1006
ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
1007
#endif
1008
#if defined(HAS_ABGRTOAR30ROW_NEON)
1009
ANY11(ABGRToAR30Row_Any_NEON, ABGRToAR30Row_NEON, 0, 4, 4, 7)
1010
#endif
1011
#if defined(HAS_ARGBTOAR30ROW_NEON)
1012
ANY11(ARGBToAR30Row_Any_NEON, ARGBToAR30Row_NEON, 0, 4, 4, 7)
1013
#endif
1014
#if defined(HAS_ARGBTOAR30ROW_SSSE3)
1015
ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
1016
#endif
1017
#if defined(HAS_ABGRTOAR30ROW_AVX2)
1018
ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
1019
#endif
1020
#if defined(HAS_ARGBTOAR30ROW_AVX2)
1021
ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
1022
#endif
1023
#if defined(HAS_J400TOARGBROW_SSE2)
1024
ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
1025
#endif
1026
#if defined(HAS_J400TOARGBROW_AVX2)
1027
ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
1028
#endif
1029
#if defined(HAS_RGB24TOARGBROW_SSSE3)
1030
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
1031
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
1032
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
1033
ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
1034
ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
1035
#endif
1036
#if defined(HAS_RAWTOARGBROW_AVX2)
1037
ANY11(RAWToARGBRow_Any_AVX2, RAWToARGBRow_AVX2, 0, 3, 4, 31)
1038
#endif
1039
#if defined(HAS_RAWTORGBAROW_SSSE3)
1040
ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15)
1041
#endif
1042
#if defined(HAS_RAWTORGB24ROW_SSSE3)
1043
ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
1044
#endif
1045
#if defined(HAS_RGB565TOARGBROW_AVX2)
1046
ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
1047
#endif
1048
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
1049
ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
1050
#endif
1051
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
1052
ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
1053
#endif
1054
#if defined(HAS_ARGBTORGB24ROW_NEON)
1055
ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 15)
1056
ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
1057
ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
1058
ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
1059
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
1060
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
1061
#endif
1062
#if defined(HAS_ARGBTORGB24ROW_MSA)
1063
ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
1064
ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
1065
ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
1066
ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
1067
ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
1068
ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
1069
#endif
1070
#if defined(HAS_ARGBTORGB24ROW_LSX)
1071
ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15)
1072
ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15)
1073
ANY11(ARGBToRGB565Row_Any_LSX, ARGBToRGB565Row_LSX, 0, 4, 2, 7)
1074
ANY11(ARGBToARGB1555Row_Any_LSX, ARGBToARGB1555Row_LSX, 0, 4, 2, 7)
1075
ANY11(ARGBToARGB4444Row_Any_LSX, ARGBToARGB4444Row_LSX, 0, 4, 2, 7)
1076
#endif
1077
#if defined(HAS_ARGBTORGB24ROW_LASX)
1078
ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31)
1079
ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31)
1080
ANY11(ARGBToRGB565Row_Any_LASX, ARGBToRGB565Row_LASX, 0, 4, 2, 15)
1081
ANY11(ARGBToARGB1555Row_Any_LASX, ARGBToARGB1555Row_LASX, 0, 4, 2, 15)
1082
ANY11(ARGBToARGB4444Row_Any_LASX, ARGBToARGB4444Row_LASX, 0, 4, 2, 15)
1083
#endif
1084
#if defined(HAS_J400TOARGBROW_LSX)
1085
ANY11(J400ToARGBRow_Any_LSX, J400ToARGBRow_LSX, 0, 1, 4, 15)
1086
#endif
1087
#if defined(HAS_RAWTORGB24ROW_NEON)
1088
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
1089
#endif
1090
#if defined(HAS_RAWTORGB24ROW_MSA)
1091
ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15)
1092
#endif
1093
#if defined(HAS_RAWTORGB24ROW_LSX)
1094
ANY11(RAWToRGB24Row_Any_LSX, RAWToRGB24Row_LSX, 0, 3, 3, 15)
1095
#endif
1096
#ifdef HAS_ARGBTOYROW_AVX2
1097
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
1098
#endif
1099
#ifdef HAS_ABGRTOYROW_AVX2
1100
ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
1101
#endif
1102
#ifdef HAS_ARGBTOYJROW_AVX2
1103
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
1104
#endif
1105
#ifdef HAS_ABGRTOYJROW_AVX2
1106
ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31)
1107
#endif
1108
#ifdef HAS_RGBATOYJROW_AVX2
1109
ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31)
1110
#endif
1111
#ifdef HAS_UYVYTOYROW_AVX2
1112
ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
1113
#endif
1114
#ifdef HAS_YUY2TOYROW_AVX2
1115
ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
1116
#endif
1117
#ifdef HAS_ARGBTOYROW_SSSE3
1118
ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
1119
#endif
1120
#ifdef HAS_BGRATOYROW_SSSE3
1121
ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
1122
ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
1123
ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
1124
#endif
1125
#ifdef HAS_YUY2TOYROW_SSE2
1126
ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
1127
ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
1128
#endif
1129
#ifdef HAS_ARGBTOYJROW_SSSE3
1130
ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
1131
#endif
1132
#ifdef HAS_ABGRTOYJROW_SSSE3
1133
ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15)
1134
#endif
1135
#ifdef HAS_RGBATOYJROW_SSSE3
1136
ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15)
1137
#endif
1138
#ifdef HAS_ARGBTOYROW_NEON
1139
ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
1140
#endif
1141
#ifdef HAS_ARGBTOYROW_NEON_DOTPROD
1142
ANY11(ARGBToYRow_Any_NEON_DotProd, ARGBToYRow_NEON_DotProd, 0, 4, 1, 15)
1143
#endif
1144
#ifdef HAS_ARGBTOYROW_MSA
1145
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
1146
#endif
1147
#ifdef HAS_ARGBTOYROW_LSX
1148
ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
1149
#endif
1150
#ifdef HAS_ARGBTOYROW_LASX
1151
ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
1152
#endif
1153
#ifdef HAS_ARGBTOYJROW_NEON
1154
ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15)
1155
#endif
1156
#ifdef HAS_ARGBTOYJROW_NEON_DOTPROD
1157
ANY11(ARGBToYJRow_Any_NEON_DotProd, ARGBToYJRow_NEON_DotProd, 0, 4, 1, 15)
1158
#endif
1159
#ifdef HAS_ABGRTOYJROW_NEON
1160
ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15)
1161
#endif
1162
#ifdef HAS_ABGRTOYJROW_NEON_DOTPROD
1163
ANY11(ABGRToYJRow_Any_NEON_DotProd, ABGRToYJRow_NEON_DotProd, 0, 4, 1, 15)
1164
#endif
1165
#ifdef HAS_RGBATOYJROW_NEON
1166
ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15)
1167
#endif
1168
#ifdef HAS_RGBATOYJROW_NEON_DOTPROD
1169
ANY11(RGBAToYJRow_Any_NEON_DotProd, RGBAToYJRow_NEON_DotProd, 0, 4, 1, 15)
1170
#endif
1171
#ifdef HAS_ARGBTOYJROW_MSA
1172
ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
1173
#endif
1174
#ifdef HAS_ARGBTOYJROW_LSX
1175
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
1176
#endif
1177
#ifdef HAS_RGBATOYJROW_LSX
1178
ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
1179
#endif
1180
#ifdef HAS_ABGRTOYJROW_LSX
1181
ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
1182
#endif
1183
#ifdef HAS_RGBATOYJROW_LASX
1184
ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
1185
#endif
1186
#ifdef HAS_ARGBTOYJROW_LASX
1187
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
1188
#endif
1189
#ifdef HAS_ABGRTOYJROW_LASX
1190
ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
1191
#endif
1192
#ifdef HAS_BGRATOYROW_NEON
1193
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
1194
#endif
1195
#ifdef HAS_BGRATOYROW_NEON_DOTPROD
1196
ANY11(BGRAToYRow_Any_NEON_DotProd, BGRAToYRow_NEON_DotProd, 0, 4, 1, 15)
1197
#endif
1198
#ifdef HAS_BGRATOYROW_MSA
1199
ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
1200
#endif
1201
#ifdef HAS_BGRATOYROW_LSX
1202
ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
1203
#endif
1204
#ifdef HAS_BGRATOYROW_LASX
1205
ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
1206
#endif
1207
#ifdef HAS_ABGRTOYROW_NEON
1208
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
1209
#endif
1210
#ifdef HAS_ABGRTOYROW_NEON_DOTPROD
1211
ANY11(ABGRToYRow_Any_NEON_DotProd, ABGRToYRow_NEON_DotProd, 0, 4, 1, 15)
1212
#endif
1213
#ifdef HAS_ABGRTOYROW_MSA
1214
ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
1215
#endif
1216
#ifdef HAS_ABGRTOYROW_LSX
1217
ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
1218
#endif
1219
#ifdef HAS_ABGRTOYROW_LASX
1220
ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
1221
#endif
1222
#ifdef HAS_RGBATOYROW_NEON
1223
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
1224
#endif
1225
#ifdef HAS_RGBATOYROW_NEON_DOTPROD
1226
ANY11(RGBAToYRow_Any_NEON_DotProd, RGBAToYRow_NEON_DotProd, 0, 4, 1, 15)
1227
#endif
1228
#ifdef HAS_RGBATOYROW_MSA
1229
ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
1230
#endif
1231
#ifdef HAS_RGBATOYROW_LSX
1232
ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
1233
#endif
1234
#ifdef HAS_RGBATOYROW_LASX
1235
ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
1236
#endif
1237
#ifdef HAS_RGB24TOYROW_NEON
1238
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
1239
#endif
1240
#ifdef HAS_RGB24TOYJROW_AVX2
1241
ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31)
1242
#endif
1243
#ifdef HAS_RGB24TOYJROW_SSSE3
1244
ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15)
1245
#endif
1246
#ifdef HAS_RGB24TOYJROW_NEON
1247
ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 15)
1248
#endif
1249
#ifdef HAS_RGB24TOYROW_MSA
1250
ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
1251
#endif
1252
#ifdef HAS_RGB24TOYROW_LSX
1253
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
1254
#endif
1255
#ifdef HAS_RGB24TOYJROW_LSX
1256
ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
1257
#endif
1258
#ifdef HAS_RGB24TOYJROW_LASX
1259
ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
1260
#endif
1261
#ifdef HAS_RGB24TOYROW_LASX
1262
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
1263
#endif
1264
#ifdef HAS_RAWTOYROW_NEON
1265
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 15)
1266
#endif
1267
#ifdef HAS_RAWTOYJROW_AVX2
1268
ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31)
1269
#endif
1270
#ifdef HAS_RAWTOYJROW_SSSE3
1271
ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15)
1272
#endif
1273
#ifdef HAS_RAWTOYJROW_NEON
1274
ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 15)
1275
#endif
1276
#ifdef HAS_RAWTOYROW_MSA
1277
ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
1278
#endif
1279
#ifdef HAS_RAWTOYROW_LSX
1280
ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
1281
#endif
1282
#ifdef HAS_RAWTOYROW_LASX
1283
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
1284
#endif
1285
#ifdef HAS_RAWTOYJROW_LSX
1286
ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
1287
#endif
1288
#ifdef HAS_RAWTOYJROW_LASX
1289
ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
1290
#endif
1291
#ifdef HAS_RGB565TOYROW_NEON
1292
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 15)
1293
#endif
1294
#ifdef HAS_RGB565TOYROW_MSA
1295
ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
1296
#endif
1297
#ifdef HAS_RGB565TOYROW_LSX
1298
ANY11(RGB565ToYRow_Any_LSX, RGB565ToYRow_LSX, 0, 2, 1, 15)
1299
#endif
1300
#ifdef HAS_RGB565TOYROW_LASX
1301
ANY11(RGB565ToYRow_Any_LASX, RGB565ToYRow_LASX, 0, 2, 1, 31)
1302
#endif
1303
#ifdef HAS_ARGB1555TOYROW_NEON
1304
#ifdef __aarch64__
1305
ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 15)
1306
#else
1307
ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
1308
#endif
1309
#endif
1310
#ifdef HAS_ARGB1555TOYROW_MSA
1311
ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
1312
#endif
1313
#ifdef HAS_ARGB1555TOYROW_LSX
1314
ANY11(ARGB1555ToYRow_Any_LSX, ARGB1555ToYRow_LSX, 0, 2, 1, 15)
1315
#endif
1316
#ifdef HAS_ARGB1555TOYROW_LASX
1317
ANY11(ARGB1555ToYRow_Any_LASX, ARGB1555ToYRow_LASX, 0, 2, 1, 31)
1318
#endif
1319
#ifdef HAS_ARGB4444TOYROW_NEON
1320
#ifdef __aarch64__
1321
ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 15)
1322
#else
1323
ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
1324
#endif
1325
#endif
1326
#ifdef HAS_YUY2TOYROW_NEON
1327
ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
1328
#endif
1329
#ifdef HAS_UYVYTOYROW_NEON
1330
ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
1331
#endif
1332
#ifdef HAS_YUY2TOYROW_MSA
1333
ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
1334
#endif
1335
#ifdef HAS_YUY2TOYROW_LSX
1336
ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15)
1337
#endif
1338
#ifdef HAS_YUY2TOYROW_LASX
1339
ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31)
1340
#endif
1341
#ifdef HAS_UYVYTOYROW_MSA
1342
ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
1343
#endif
1344
#ifdef HAS_UYVYTOYROW_LSX
1345
ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15)
1346
#endif
1347
#ifdef HAS_UYVYTOYROW_LASX
1348
ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31)
1349
#endif
1350
#ifdef HAS_AYUVTOYROW_NEON
1351
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
1352
#endif
1353
#ifdef HAS_SWAPUVROW_SSSE3
1354
ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15)
1355
#endif
1356
#ifdef HAS_SWAPUVROW_AVX2
1357
ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31)
1358
#endif
1359
#ifdef HAS_SWAPUVROW_NEON
1360
ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
1361
#endif
1362
#ifdef HAS_RGB24TOARGBROW_NEON
1363
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
1364
#endif
1365
#ifdef HAS_RGB24TOARGBROW_MSA
1366
ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
1367
#endif
1368
#ifdef HAS_RGB24TOARGBROW_LSX
1369
ANY11(RGB24ToARGBRow_Any_LSX, RGB24ToARGBRow_LSX, 0, 3, 4, 15)
1370
#endif
1371
#ifdef HAS_RGB24TOARGBROW_LASX
1372
ANY11(RGB24ToARGBRow_Any_LASX, RGB24ToARGBRow_LASX, 0, 3, 4, 31)
1373
#endif
1374
#ifdef HAS_RAWTOARGBROW_NEON
1375
ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
1376
#endif
1377
#ifdef HAS_RAWTORGBAROW_NEON
1378
ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7)
1379
#endif
1380
#ifdef HAS_RAWTOARGBROW_MSA
1381
ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
1382
#endif
1383
#ifdef HAS_RAWTOARGBROW_LSX
1384
ANY11(RAWToARGBRow_Any_LSX, RAWToARGBRow_LSX, 0, 3, 4, 15)
1385
#endif
1386
#ifdef HAS_RAWTOARGBROW_LASX
1387
ANY11(RAWToARGBRow_Any_LASX, RAWToARGBRow_LASX, 0, 3, 4, 31)
1388
#endif
1389
#ifdef HAS_RGB565TOARGBROW_NEON
1390
ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 15)
1391
#endif
1392
#ifdef HAS_RGB565TOARGBROW_MSA
1393
ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
1394
#endif
1395
#ifdef HAS_RGB565TOARGBROW_LSX
1396
ANY11(RGB565ToARGBRow_Any_LSX, RGB565ToARGBRow_LSX, 0, 2, 4, 15)
1397
#endif
1398
#ifdef HAS_RGB565TOARGBROW_LASX
1399
ANY11(RGB565ToARGBRow_Any_LASX, RGB565ToARGBRow_LASX, 0, 2, 4, 31)
1400
#endif
1401
#ifdef HAS_ARGB1555TOARGBROW_NEON
1402
ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 15)
1403
#endif
1404
#ifdef HAS_ARGB1555TOARGBROW_MSA
1405
ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
1406
#endif
1407
#ifdef HAS_ARGB1555TOARGBROW_LSX
1408
ANY11(ARGB1555ToARGBRow_Any_LSX, ARGB1555ToARGBRow_LSX, 0, 2, 4, 15)
1409
#endif
1410
#ifdef HAS_ARGB1555TOARGBROW_LASX
1411
ANY11(ARGB1555ToARGBRow_Any_LASX, ARGB1555ToARGBRow_LASX, 0, 2, 4, 31)
1412
#endif
1413
#ifdef HAS_ARGB4444TOARGBROW_NEON
1414
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
1415
#endif
1416
#ifdef HAS_ARGB4444TOARGBROW_MSA
1417
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
1418
#endif
1419
#ifdef HAS_ARGB4444TOARGBROW_LSX
1420
ANY11(ARGB4444ToARGBRow_Any_LSX, ARGB4444ToARGBRow_LSX, 0, 2, 4, 15)
1421
#endif
1422
#ifdef HAS_ARGB4444TOARGBROW_LASX
1423
ANY11(ARGB4444ToARGBRow_Any_LASX, ARGB4444ToARGBRow_LASX, 0, 2, 4, 31)
1424
#endif
1425
#ifdef HAS_ARGBATTENUATEROW_SSSE3
1426
ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
1427
#endif
1428
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
1429
ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
1430
#endif
1431
#ifdef HAS_ARGBATTENUATEROW_AVX2
1432
ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
1433
#endif
1434
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
1435
ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
1436
#endif
1437
#ifdef HAS_ARGBATTENUATEROW_NEON
1438
ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
1439
#endif
1440
#ifdef HAS_ARGBATTENUATEROW_MSA
1441
ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
1442
#endif
1443
#ifdef HAS_ARGBATTENUATEROW_LSX
1444
ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7)
1445
#endif
1446
#ifdef HAS_ARGBATTENUATEROW_LASX
1447
ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15)
1448
#endif
1449
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
1450
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
1451
#endif
1452
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
1453
ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31)
1454
#endif
1455
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
1456
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
1457
#endif
1458
#ifdef HAS_ARGBEXTRACTALPHAROW_MSA
1459
ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
1460
#endif
1461
#ifdef HAS_ARGBEXTRACTALPHAROW_LSX
1462
ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15)
1463
#endif
1464
#undef ANY11
1465
1466
// Any 1 to 1 blended.  Destination is read, modify, write.
1467
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)              \
1468
0
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
1469
0
    SIMD_ALIGNED(uint8_t vin[64]);                                       \
1470
0
    SIMD_ALIGNED(uint8_t vout[64]);                                      \
1471
0
    memset(vin, 0, sizeof(vin));   /* for msan */                        \
1472
0
    memset(vout, 0, sizeof(vout)); /* for msan */                        \
1473
0
    int r = width & MASK;                                                \
1474
0
    int n = width & ~MASK;                                               \
1475
0
    if (n > 0) {                                                         \
1476
0
      ANY_SIMD(src_ptr, dst_ptr, n);                                     \
1477
0
    }                                                                    \
1478
0
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
1479
0
    memcpy(vout, dst_ptr + n * BPP, r * BPP);                            \
1480
0
    ANY_SIMD(vin, vout, MASK + 1);                                       \
1481
0
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
1482
0
  }
Unexecuted instantiation: ARGBCopyAlphaRow_Any_AVX2
Unexecuted instantiation: ARGBCopyAlphaRow_Any_SSE2
Unexecuted instantiation: ARGBCopyYToAlphaRow_Any_AVX2
Unexecuted instantiation: ARGBCopyYToAlphaRow_Any_SSE2
1483
1484
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
1485
ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
1486
#endif
1487
#ifdef HAS_ARGBCOPYALPHAROW_SSE2
1488
ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
1489
#endif
1490
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
1491
ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
1492
#endif
1493
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
1494
ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
1495
#endif
1496
#undef ANY11B
1497
1498
// Any 1 to 1 with parameter.
1499
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                          \
1500
278
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
1501
278
    SIMD_ALIGNED(uint8_t vin[64]);                                             \
1502
278
    SIMD_ALIGNED(uint8_t vout[64]);                                            \
1503
278
    memset(vin, 0, sizeof(vin)); /* for msan */                                \
1504
278
    int r = width & MASK;                                                      \
1505
278
    int n = width & ~MASK;                                                     \
1506
278
    if (n > 0) {                                                               \
1507
81
      ANY_SIMD(src_ptr, dst_ptr, param, n);                                    \
1508
81
    }                                                                          \
1509
278
    memcpy(vin, src_ptr + n * SBPP, r * SBPP);                                 \
1510
278
    ANY_SIMD(vin, vout, param, MASK + 1);                                      \
1511
278
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                                  \
1512
278
  }
Unexecuted instantiation: I400ToARGBRow_Any_SSE2
I400ToARGBRow_Any_AVX2
Line
Count
Source
1500
278
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
1501
278
    SIMD_ALIGNED(uint8_t vin[64]);                                             \
1502
278
    SIMD_ALIGNED(uint8_t vout[64]);                                            \
1503
278
    memset(vin, 0, sizeof(vin)); /* for msan */                                \
1504
278
    int r = width & MASK;                                                      \
1505
278
    int n = width & ~MASK;                                                     \
1506
278
    if (n > 0) {                                                               \
1507
81
      ANY_SIMD(src_ptr, dst_ptr, param, n);                                    \
1508
81
    }                                                                          \
1509
278
    memcpy(vin, src_ptr + n * SBPP, r * SBPP);                                 \
1510
278
    ANY_SIMD(vin, vout, param, MASK + 1);                                      \
1511
278
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                                  \
1512
278
  }
Unexecuted instantiation: ARGBToRGB565DitherRow_Any_SSE2
Unexecuted instantiation: ARGBToRGB565DitherRow_Any_AVX2
Unexecuted instantiation: ARGBShuffleRow_Any_SSSE3
Unexecuted instantiation: ARGBShuffleRow_Any_AVX2
1513
1514
#if defined(HAS_I400TOARGBROW_SSE2)
1515
ANY11P(I400ToARGBRow_Any_SSE2,
1516
       I400ToARGBRow_SSE2,
1517
       const struct YuvConstants*,
1518
       1,
1519
       4,
1520
       7)
1521
#endif
1522
#if defined(HAS_I400TOARGBROW_AVX2)
1523
ANY11P(I400ToARGBRow_Any_AVX2,
1524
       I400ToARGBRow_AVX2,
1525
       const struct YuvConstants*,
1526
       1,
1527
       4,
1528
       15)
1529
#endif
1530
#if defined(HAS_I400TOARGBROW_NEON)
1531
ANY11P(I400ToARGBRow_Any_NEON,
1532
       I400ToARGBRow_NEON,
1533
       const struct YuvConstants*,
1534
       1,
1535
       4,
1536
       7)
1537
#endif
1538
#if defined(HAS_I400TOARGBROW_MSA)
1539
ANY11P(I400ToARGBRow_Any_MSA,
1540
       I400ToARGBRow_MSA,
1541
       const struct YuvConstants*,
1542
       1,
1543
       4,
1544
       15)
1545
#endif
1546
#if defined(HAS_I400TOARGBROW_LSX)
1547
ANY11P(I400ToARGBRow_Any_LSX,
1548
       I400ToARGBRow_LSX,
1549
       const struct YuvConstants*,
1550
       1,
1551
       4,
1552
       15)
1553
#endif
1554
1555
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
1556
ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
1557
       ARGBToRGB565DitherRow_SSE2,
1558
       const uint32_t,
1559
       4,
1560
       2,
1561
       3)
1562
#endif
1563
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
1564
ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
1565
       ARGBToRGB565DitherRow_AVX2,
1566
       const uint32_t,
1567
       4,
1568
       2,
1569
       7)
1570
#endif
1571
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
1572
ANY11P(ARGBToRGB565DitherRow_Any_NEON,
1573
       ARGBToRGB565DitherRow_NEON,
1574
       const uint32_t,
1575
       4,
1576
       2,
1577
       7)
1578
#endif
1579
#if defined(HAS_ARGBTORGB565DITHERROW_MSA)
1580
ANY11P(ARGBToRGB565DitherRow_Any_MSA,
1581
       ARGBToRGB565DitherRow_MSA,
1582
       const uint32_t,
1583
       4,
1584
       2,
1585
       7)
1586
#endif
1587
#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
1588
ANY11P(ARGBToRGB565DitherRow_Any_LSX,
1589
       ARGBToRGB565DitherRow_LSX,
1590
       const uint32_t,
1591
       4,
1592
       2,
1593
       7)
1594
#endif
1595
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
1596
ANY11P(ARGBToRGB565DitherRow_Any_LASX,
1597
       ARGBToRGB565DitherRow_LASX,
1598
       const uint32_t,
1599
       4,
1600
       2,
1601
       15)
1602
#endif
1603
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
1604
ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
1605
#endif
1606
#ifdef HAS_ARGBSHUFFLEROW_AVX2
1607
ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
1608
#endif
1609
#ifdef HAS_ARGBSHUFFLEROW_NEON
1610
ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
1611
#endif
1612
#ifdef HAS_ARGBSHUFFLEROW_MSA
1613
ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
1614
#endif
1615
#ifdef HAS_ARGBSHUFFLEROW_LSX
1616
ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7)
1617
#endif
1618
#ifdef HAS_ARGBSHUFFLEROW_LASX
1619
ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
1620
#endif
1621
#undef ANY11P
1622
#undef ANY11P
1623
1624
// Any 1 to 1 with type
1625
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)  \
1626
0
  void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
1627
0
    SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]);                 \
1628
0
    SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]);                 \
1629
0
    memset(vin, 0, sizeof(vin)); /* for msan */                   \
1630
0
    int r = width & MASK;                                         \
1631
0
    int n = width & ~MASK;                                        \
1632
0
    if (n > 0) {                                                  \
1633
0
      ANY_SIMD(src_ptr, dst_ptr, n);                              \
1634
0
    }                                                             \
1635
0
    memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP);        \
1636
0
    ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1);                \
1637
0
    memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP);         \
1638
0
  }
Unexecuted instantiation: ARGBToAR64Row_Any_SSSE3
Unexecuted instantiation: ARGBToAB64Row_Any_SSSE3
Unexecuted instantiation: AR64ToARGBRow_Any_SSSE3
Unexecuted instantiation: AB64ToARGBRow_Any_SSSE3
Unexecuted instantiation: ARGBToAR64Row_Any_AVX2
Unexecuted instantiation: ARGBToAB64Row_Any_AVX2
Unexecuted instantiation: AR64ToARGBRow_Any_AVX2
Unexecuted instantiation: AB64ToARGBRow_Any_AVX2
1639
1640
#ifdef HAS_ARGBTOAR64ROW_SSSE3
1641
ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
1642
#endif
1643
1644
#ifdef HAS_ARGBTOAB64ROW_SSSE3
1645
ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
1646
#endif
1647
1648
#ifdef HAS_AR64TOARGBROW_SSSE3
1649
ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
1650
#endif
1651
1652
#ifdef HAS_ARGBTOAR64ROW_SSSE3
1653
ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
1654
#endif
1655
1656
#ifdef HAS_ARGBTOAR64ROW_AVX2
1657
ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
1658
#endif
1659
1660
#ifdef HAS_ARGBTOAB64ROW_AVX2
1661
ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
1662
#endif
1663
1664
#ifdef HAS_AR64TOARGBROW_AVX2
1665
ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
1666
#endif
1667
1668
#ifdef HAS_ARGBTOAR64ROW_AVX2
1669
ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
1670
#endif
1671
1672
#ifdef HAS_ARGBTOAR64ROW_NEON
1673
ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
1674
#endif
1675
1676
#ifdef HAS_ARGBTOAB64ROW_NEON
1677
ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
1678
#endif
1679
1680
#ifdef HAS_AR64TOARGBROW_NEON
1681
ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
1682
#endif
1683
1684
#ifdef HAS_ARGBTOAR64ROW_NEON
1685
ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
1686
#endif
1687
1688
#undef ANY11T
1689
1690
// Any 1 to 1 with parameter and shorts.  BPP measures in shorts.
1691
#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)             \
1692
530
  void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
1693
530
    SIMD_ALIGNED(STYPE vin[64]);                                             \
1694
530
    SIMD_ALIGNED(DTYPE vout[64]);                                            \
1695
530
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
1696
530
    int r = width & MASK;                                                    \
1697
530
    int n = width & ~MASK;                                                   \
1698
530
    if (n > 0) {                                                             \
1699
119
      ANY_SIMD(src_ptr, dst_ptr, scale, n);                                  \
1700
119
    }                                                                        \
1701
530
    memcpy(vin, src_ptr + n, r * SBPP);                                      \
1702
530
    ANY_SIMD(vin, vout, scale, MASK + 1);                                    \
1703
530
    memcpy(dst_ptr + n, vout, r * BPP);                                      \
1704
530
  }
Unexecuted instantiation: Convert16To8Row_Any_SSSE3
Convert16To8Row_Any_AVX2
Line
Count
Source
1692
530
  void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
1693
530
    SIMD_ALIGNED(STYPE vin[64]);                                             \
1694
530
    SIMD_ALIGNED(DTYPE vout[64]);                                            \
1695
530
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
1696
530
    int r = width & MASK;                                                    \
1697
530
    int n = width & ~MASK;                                                   \
1698
530
    if (n > 0) {                                                             \
1699
119
      ANY_SIMD(src_ptr, dst_ptr, scale, n);                                  \
1700
119
    }                                                                        \
1701
530
    memcpy(vin, src_ptr + n, r * SBPP);                                      \
1702
530
    ANY_SIMD(vin, vout, scale, MASK + 1);                                    \
1703
530
    memcpy(dst_ptr + n, vout, r * BPP);                                      \
1704
530
  }
Unexecuted instantiation: Convert16To8Row_Any_AVX512BW
Unexecuted instantiation: Convert8To16Row_Any_SSE2
Unexecuted instantiation: Convert8To16Row_Any_AVX2
Unexecuted instantiation: MultiplyRow_16_Any_AVX2
Unexecuted instantiation: DivideRow_16_Any_AVX2
1705
1706
#ifdef HAS_CONVERT16TO8ROW_SSSE3
1707
ANY11C(Convert16To8Row_Any_SSSE3,
1708
       Convert16To8Row_SSSE3,
1709
       2,
1710
       1,
1711
       uint16_t,
1712
       uint8_t,
1713
       15)
1714
#endif
1715
#ifdef HAS_CONVERT16TO8ROW_AVX2
1716
ANY11C(Convert16To8Row_Any_AVX2,
1717
       Convert16To8Row_AVX2,
1718
       2,
1719
       1,
1720
       uint16_t,
1721
       uint8_t,
1722
       31)
1723
#endif
1724
#ifdef HAS_CONVERT16TO8ROW_AVX512BW
1725
ANY11C(Convert16To8Row_Any_AVX512BW,
1726
       Convert16To8Row_AVX512BW,
1727
       2,
1728
       1,
1729
       uint16_t,
1730
       uint8_t,
1731
       63)
1732
#endif
1733
#ifdef HAS_CONVERT16TO8ROW_NEON
1734
ANY11C(Convert16To8Row_Any_NEON,
1735
       Convert16To8Row_NEON,
1736
       2,
1737
       1,
1738
       uint16_t,
1739
       uint8_t,
1740
       15)
1741
#endif
1742
#ifdef HAS_CONVERT8TO16ROW_SSE2
1743
ANY11C(Convert8To16Row_Any_SSE2,
1744
       Convert8To16Row_SSE2,
1745
       1,
1746
       2,
1747
       uint8_t,
1748
       uint16_t,
1749
       15)
1750
#endif
1751
#ifdef HAS_CONVERT8TO16ROW_AVX2
1752
ANY11C(Convert8To16Row_Any_AVX2,
1753
       Convert8To16Row_AVX2,
1754
       1,
1755
       2,
1756
       uint8_t,
1757
       uint16_t,
1758
       31)
1759
#endif
1760
#ifdef HAS_MULTIPLYROW_16_AVX2
1761
ANY11C(MultiplyRow_16_Any_AVX2,
1762
       MultiplyRow_16_AVX2,
1763
       2,
1764
       2,
1765
       uint16_t,
1766
       uint16_t,
1767
       31)
1768
#endif
1769
#ifdef HAS_MULTIPLYROW_16_NEON
1770
ANY11C(MultiplyRow_16_Any_NEON,
1771
       MultiplyRow_16_NEON,
1772
       2,
1773
       2,
1774
       uint16_t,
1775
       uint16_t,
1776
       15)
1777
#endif
1778
#ifdef HAS_DIVIDEROW_16_AVX2
1779
ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31)
1780
#endif
1781
#ifdef HAS_DIVIDEROW_16_NEON
1782
ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
1783
#endif
1784
#undef ANY11C
1785
1786
// Any 1 to 1 with parameter and shorts.  BPP measures in shorts.
1787
#define ANY11SB(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)         \
1788
  void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int bias, \
1789
0
               int width) {                                               \
1790
0
    SIMD_ALIGNED(STYPE vin[64]);                                          \
1791
0
    SIMD_ALIGNED(DTYPE vout[64]);                                         \
1792
0
    memset(vin, 0, sizeof(vin)); /* for msan */                           \
1793
0
    int r = width & MASK;                                                 \
1794
0
    int n = width & ~MASK;                                                \
1795
0
    if (n > 0) {                                                          \
1796
0
      ANY_SIMD(src_ptr, dst_ptr, scale, bias, n);                         \
1797
0
    }                                                                     \
1798
0
    memcpy(vin, src_ptr + n, r * SBPP);                                   \
1799
0
    ANY_SIMD(vin, vout, scale, bias, MASK + 1);                           \
1800
0
    memcpy(dst_ptr + n, vout, r * BPP);                                   \
1801
0
  }
1802
1803
#ifdef HAS_CONVERT8TO8ROW_NEON
1804
ANY11SB(Convert8To8Row_Any_NEON,
1805
        Convert8To8Row_NEON,
1806
        1,
1807
        1,
1808
        uint8_t,
1809
        uint8_t,
1810
        31)
1811
#endif
1812
#ifdef HAS_CONVERT8TO8ROW_AVX2
1813
ANY11SB(Convert8To8Row_Any_AVX2,
1814
        Convert8To8Row_AVX2,
1815
        1,
1816
        1,
1817
        uint8_t,
1818
        uint8_t,
1819
        31)
1820
#endif
1821
#undef ANY11B
1822
1823
// Any 1 to 1 with parameter and shorts to byte.  BPP measures in shorts.
1824
#define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK)             \
1825
204
  void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
1826
204
    SIMD_ALIGNED(ST vin[32]);                                           \
1827
204
    SIMD_ALIGNED(T vout[32]);                                           \
1828
204
    memset(vin, 0, sizeof(vin)); /* for msan */                         \
1829
204
    int r = width & MASK;                                               \
1830
204
    int n = width & ~MASK;                                              \
1831
204
    if (n > 0) {                                                        \
1832
78
      ANY_SIMD(src_ptr, dst_ptr, param, n);                             \
1833
78
    }                                                                   \
1834
204
    memcpy(vin, src_ptr + n, r * SBPP);                                 \
1835
204
    ANY_SIMD(vin, vout, param, MASK + 1);                               \
1836
204
    memcpy(dst_ptr + n, vout, r * BPP);                                 \
1837
204
  }
Unexecuted instantiation: HalfFloatRow_Any_SSE2
HalfFloatRow_Any_AVX2
Line
Count
Source
1825
204
  void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
1826
204
    SIMD_ALIGNED(ST vin[32]);                                           \
1827
204
    SIMD_ALIGNED(T vout[32]);                                           \
1828
204
    memset(vin, 0, sizeof(vin)); /* for msan */                         \
1829
204
    int r = width & MASK;                                               \
1830
204
    int n = width & ~MASK;                                              \
1831
204
    if (n > 0) {                                                        \
1832
78
      ANY_SIMD(src_ptr, dst_ptr, param, n);                             \
1833
78
    }                                                                   \
1834
204
    memcpy(vin, src_ptr + n, r * SBPP);                                 \
1835
204
    ANY_SIMD(vin, vout, param, MASK + 1);                               \
1836
204
    memcpy(dst_ptr + n, vout, r * BPP);                                 \
1837
204
  }
1838
1839
#ifdef HAS_HALFFLOATROW_SSE2
1840
ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7)
1841
#endif
1842
#ifdef HAS_HALFFLOATROW_AVX2
1843
ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15)
1844
#endif
1845
#ifdef HAS_HALFFLOATROW_F16C
1846
ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15)
1847
ANY11P16(HalfFloat1Row_Any_F16C,
1848
         HalfFloat1Row_F16C,
1849
         uint16_t,
1850
         uint16_t,
1851
         2,
1852
         2,
1853
         15)
1854
#endif
1855
#ifdef HAS_HALFFLOATROW_NEON
1856
ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 15)
1857
#endif
1858
#ifdef HAS_HALFFLOATROW_MSA
1859
ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31)
1860
#endif
1861
#ifdef HAS_BYTETOFLOATROW_NEON
1862
ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7)
1863
#endif
1864
#ifdef HAS_HALFFLOATROW_LSX
1865
ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31)
1866
#endif
1867
#undef ANY11P16
1868
1869
// Any 1 to 1 with yuvconstants
1870
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)              \
1871
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr,                 \
1872
0
               const struct YuvConstants* yuvconstants, int width) {     \
1873
0
    SIMD_ALIGNED(uint8_t vin[128]);                                      \
1874
0
    SIMD_ALIGNED(uint8_t vout[128]);                                     \
1875
0
    memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
1876
0
    int r = width & MASK;                                                \
1877
0
    int n = width & ~MASK;                                               \
1878
0
    if (n > 0) {                                                         \
1879
0
      ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                       \
1880
0
    }                                                                    \
1881
0
    memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
1882
0
    ANY_SIMD(vin, vout, yuvconstants, MASK + 1);                         \
1883
0
    memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
1884
0
  }
Unexecuted instantiation: YUY2ToARGBRow_Any_SSSE3
Unexecuted instantiation: UYVYToARGBRow_Any_SSSE3
Unexecuted instantiation: YUY2ToARGBRow_Any_AVX2
Unexecuted instantiation: UYVYToARGBRow_Any_AVX2
1885
1886
#if defined(HAS_YUY2TOARGBROW_SSSE3)
1887
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
1888
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
1889
#endif
1890
#if defined(HAS_YUY2TOARGBROW_AVX2)
1891
ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
1892
ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
1893
#endif
1894
#if defined(HAS_YUY2TOARGBROW_NEON)
1895
ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
1896
ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
1897
#endif
1898
#if defined(HAS_YUY2TOARGBROW_MSA)
1899
ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
1900
ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
1901
#endif
1902
#if defined(HAS_YUY2TOARGBROW_LSX)
1903
ANY11C(YUY2ToARGBRow_Any_LSX, YUY2ToARGBRow_LSX, 1, 4, 4, 7)
1904
ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
1905
#endif
1906
#undef ANY11C
1907
1908
// Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
1909
#define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK)           \
1910
  void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
1911
3.29M
               int width, int source_y_fraction) {                   \
1912
3.29M
    SIMD_ALIGNED(TS vin[64 * 2]);                                    \
1913
3.29M
    SIMD_ALIGNED(TD vout[64]);                                       \
1914
3.29M
    memset(vin, 0, sizeof(vin)); /* for msan */                      \
1915
3.29M
    int r = width & MASK;                                            \
1916
3.29M
    int n = width & ~MASK;                                           \
1917
3.29M
    if (n > 0) {                                                     \
1918
1.44M
      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);  \
1919
1.44M
    }                                                                \
1920
3.29M
    memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS));          \
1921
3.29M
    if (source_y_fraction) {                                         \
1922
2.80M
      memcpy(vin + 64, src_ptr + src_stride + n * SBPP,              \
1923
2.80M
             r * SBPP * sizeof(TS));                                 \
1924
2.80M
    }                                                                \
1925
3.29M
    ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction);            \
1926
3.29M
    memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD));           \
1927
3.29M
  }
InterpolateRow_Any_AVX2
Line
Count
Source
1911
3.29M
               int width, int source_y_fraction) {                   \
1912
3.29M
    SIMD_ALIGNED(TS vin[64 * 2]);                                    \
1913
3.29M
    SIMD_ALIGNED(TD vout[64]);                                       \
1914
3.29M
    memset(vin, 0, sizeof(vin)); /* for msan */                      \
1915
3.29M
    int r = width & MASK;                                            \
1916
3.29M
    int n = width & ~MASK;                                           \
1917
3.29M
    if (n > 0) {                                                     \
1918
1.44M
      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);  \
1919
1.44M
    }                                                                \
1920
3.29M
    memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS));          \
1921
3.29M
    if (source_y_fraction) {                                         \
1922
2.80M
      memcpy(vin + 64, src_ptr + src_stride + n * SBPP,              \
1923
2.80M
             r * SBPP * sizeof(TS));                                 \
1924
2.80M
    }                                                                \
1925
3.29M
    ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction);            \
1926
3.29M
    memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD));           \
1927
3.29M
  }
Unexecuted instantiation: InterpolateRow_Any_SSSE3
1928
1929
#ifdef HAS_INTERPOLATEROW_AVX2
1930
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
1931
#endif
1932
#ifdef HAS_INTERPOLATEROW_SSSE3
1933
ANY11I(InterpolateRow_Any_SSSE3,
1934
       InterpolateRow_SSSE3,
1935
       uint8_t,
1936
       uint8_t,
1937
       1,
1938
       1,
1939
       15)
1940
#endif
1941
#ifdef HAS_INTERPOLATEROW_NEON
1942
ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15)
1943
#endif
1944
#ifdef HAS_INTERPOLATEROW_MSA
1945
ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, uint8_t, 1, 1, 31)
1946
#endif
1947
#ifdef HAS_INTERPOLATEROW_LSX
1948
ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, uint8_t, 1, 1, 31)
1949
#endif
1950
1951
#ifdef HAS_INTERPOLATEROW_16_NEON
1952
ANY11I(InterpolateRow_16_Any_NEON,
1953
       InterpolateRow_16_NEON,
1954
       uint16_t,
1955
       uint16_t,
1956
       1,
1957
       1,
1958
       7)
1959
#endif
1960
#undef ANY11I
1961
1962
// Any 1 to 1 interpolate with scale param
1963
#define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK)                \
1964
  void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride,       \
1965
0
               int scale, int width, int source_y_fraction) {              \
1966
0
    SIMD_ALIGNED(TS vin[64 * 2]);                                          \
1967
0
    SIMD_ALIGNED(TD vout[64]);                                             \
1968
0
    memset(vin, 0, sizeof(vin)); /* for msan */                            \
1969
0
    int r = width & MASK;                                                  \
1970
0
    int n = width & ~MASK;                                                 \
1971
0
    if (n > 0) {                                                           \
1972
0
      ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
1973
0
    }                                                                      \
1974
0
    memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS));                \
1975
0
    if (source_y_fraction) {                                               \
1976
0
      memcpy(vin + 64, src_ptr + src_stride + n * SBPP,                    \
1977
0
             r * SBPP * sizeof(TS));                                       \
1978
0
    }                                                                      \
1979
0
    ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction);           \
1980
0
    memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD));                 \
1981
0
  }
1982
1983
#ifdef HAS_INTERPOLATEROW_16TO8_NEON
1984
ANY11IS(InterpolateRow_16To8_Any_NEON,
1985
        InterpolateRow_16To8_NEON,
1986
        uint8_t,
1987
        uint16_t,
1988
        1,
1989
        1,
1990
        7)
1991
#endif
1992
#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
1993
ANY11IS(InterpolateRow_16To8_Any_AVX2,
1994
        InterpolateRow_16To8_AVX2,
1995
        uint8_t,
1996
        uint16_t,
1997
        1,
1998
        1,
1999
        31)
2000
#endif
2001
2002
#undef ANY11IS
2003
2004
// Any 1 to 1 mirror.
2005
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                          \
2006
0
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
2007
0
    SIMD_ALIGNED(uint8_t vin[64]);                                    \
2008
0
    SIMD_ALIGNED(uint8_t vout[64]);                                   \
2009
0
    memset(vin, 0, sizeof(vin)); /* for msan */                       \
2010
0
    int r = width & MASK;                                             \
2011
0
    int n = width & ~MASK;                                            \
2012
0
    if (n > 0) {                                                      \
2013
0
      ANY_SIMD(src_ptr + r * BPP, dst_ptr, n);                        \
2014
0
    }                                                                 \
2015
0
    memcpy(vin, src_ptr, r* BPP);                                     \
2016
0
    ANY_SIMD(vin, vout, MASK + 1);                                    \
2017
0
    memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP);  \
2018
0
  }
Unexecuted instantiation: MirrorRow_Any_AVX2
Unexecuted instantiation: MirrorRow_Any_SSSE3
Unexecuted instantiation: MirrorUVRow_Any_AVX2
Unexecuted instantiation: MirrorUVRow_Any_SSSE3
Unexecuted instantiation: ARGBMirrorRow_Any_AVX2
Unexecuted instantiation: ARGBMirrorRow_Any_SSE2
Unexecuted instantiation: RGB24MirrorRow_Any_SSSE3
2019
2020
#ifdef HAS_MIRRORROW_AVX2
2021
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
2022
#endif
2023
#ifdef HAS_MIRRORROW_SSSE3
2024
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
2025
#endif
2026
#ifdef HAS_MIRRORROW_NEON
2027
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
2028
#endif
2029
#ifdef HAS_MIRRORROW_MSA
2030
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
2031
#endif
2032
#ifdef HAS_MIRRORROW_LSX
2033
ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31)
2034
#endif
2035
#ifdef HAS_MIRRORROW_LASX
2036
ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
2037
#endif
2038
#ifdef HAS_MIRRORUVROW_AVX2
2039
ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
2040
#endif
2041
#ifdef HAS_MIRRORUVROW_SSSE3
2042
ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
2043
#endif
2044
#ifdef HAS_MIRRORUVROW_NEON
2045
ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
2046
#endif
2047
#ifdef HAS_MIRRORUVROW_MSA
2048
ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
2049
#endif
2050
#ifdef HAS_MIRRORUVROW_LSX
2051
ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7)
2052
#endif
2053
#ifdef HAS_MIRRORUVROW_LASX
2054
ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15)
2055
#endif
2056
#ifdef HAS_ARGBMIRRORROW_AVX2
2057
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
2058
#endif
2059
#ifdef HAS_ARGBMIRRORROW_SSE2
2060
ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
2061
#endif
2062
#ifdef HAS_ARGBMIRRORROW_NEON
2063
ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
2064
#endif
2065
#ifdef HAS_ARGBMIRRORROW_MSA
2066
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
2067
#endif
2068
#ifdef HAS_ARGBMIRRORROW_LSX
2069
ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
2070
#endif
2071
#ifdef HAS_ARGBMIRRORROW_LASX
2072
ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
2073
#endif
2074
#ifdef HAS_RGB24MIRRORROW_SSSE3
2075
ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
2076
#endif
2077
#ifdef HAS_RGB24MIRRORROW_NEON
2078
ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
2079
#endif
2080
#undef ANY11M
2081
2082
// Any 1 plane. (memset)
2083
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK)        \
2084
0
  void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
2085
0
    SIMD_ALIGNED(uint8_t vout[64]);                  \
2086
0
    int r = width & MASK;                            \
2087
0
    int n = width & ~MASK;                           \
2088
0
    if (n > 0) {                                     \
2089
0
      ANY_SIMD(dst_ptr, v32, n);                     \
2090
0
    }                                                \
2091
0
    ANY_SIMD(vout, v32, MASK + 1);                   \
2092
0
    memcpy(dst_ptr + n * BPP, vout, r * BPP);        \
2093
0
  }
2094
2095
#ifdef HAS_SETROW_X86
2096
ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3)
2097
#endif
2098
#ifdef HAS_SETROW_NEON
2099
ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15)
2100
#endif
2101
#ifdef HAS_SETROW_LSX
2102
ANY1(SetRow_Any_LSX, SetRow_LSX, uint8_t, 1, 15)
2103
#endif
2104
#ifdef HAS_ARGBSETROW_NEON
2105
ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
2106
#endif
2107
#ifdef HAS_ARGBSETROW_MSA
2108
ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
2109
#endif
2110
#ifdef HAS_ARGBSETROW_LSX
2111
ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
2112
#endif
2113
#undef ANY1
2114
2115
// Any 1 to 2.  Outputs UV planes.
2116
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK)         \
2117
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
2118
1.84M
               int width) {                                            \
2119
1.84M
    SIMD_ALIGNED(uint8_t vin[128]);                                    \
2120
1.84M
    SIMD_ALIGNED(uint8_t vout[128 * 2]);                               \
2121
1.84M
    memset(vin, 0, sizeof(vin)); /* for msan */                        \
2122
1.84M
    int r = width & MASK;                                              \
2123
1.84M
    int n = width & ~MASK;                                             \
2124
1.84M
    if (n > 0) {                                                       \
2125
683k
      ANY_SIMD(src_ptr, dst_u, dst_v, n);                              \
2126
683k
    }                                                                  \
2127
1.84M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
2128
1.84M
    ANY_SIMD(vin, vout, vout + 128, MASK + 1);                         \
2129
1.84M
    memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT));            \
2130
1.84M
    memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT));      \
2131
1.84M
  }
Unexecuted instantiation: SplitUVRow_Any_SSE2
Unexecuted instantiation: SplitUVRow_Any_AVX2
Unexecuted instantiation: ARGBToUV444Row_Any_SSSE3
Unexecuted instantiation: ARGBToUVJ444Row_Any_SSSE3
Unexecuted instantiation: ARGBToUV444Row_Any_AVX2
ARGBToUVJ444Row_Any_AVX2
Line
Count
Source
2118
1.84M
               int width) {                                            \
2119
1.84M
    SIMD_ALIGNED(uint8_t vin[128]);                                    \
2120
1.84M
    SIMD_ALIGNED(uint8_t vout[128 * 2]);                               \
2121
1.84M
    memset(vin, 0, sizeof(vin)); /* for msan */                        \
2122
1.84M
    int r = width & MASK;                                              \
2123
1.84M
    int n = width & ~MASK;                                             \
2124
1.84M
    if (n > 0) {                                                       \
2125
683k
      ANY_SIMD(src_ptr, dst_u, dst_v, n);                              \
2126
683k
    }                                                                  \
2127
1.84M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
2128
1.84M
    ANY_SIMD(vin, vout, vout + 128, MASK + 1);                         \
2129
1.84M
    memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT));            \
2130
1.84M
    memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT));      \
2131
1.84M
  }
Unexecuted instantiation: YUY2ToUV422Row_Any_AVX2
Unexecuted instantiation: UYVYToUV422Row_Any_AVX2
Unexecuted instantiation: YUY2ToUV422Row_Any_SSE2
Unexecuted instantiation: UYVYToUV422Row_Any_SSE2
2132
2133
#ifdef HAS_SPLITUVROW_SSE2
2134
ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
2135
#endif
2136
#ifdef HAS_SPLITUVROW_AVX2
2137
ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
2138
#endif
2139
#ifdef HAS_SPLITUVROW_NEON
2140
ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
2141
#endif
2142
#ifdef HAS_SPLITUVROW_MSA
2143
ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
2144
#endif
2145
#ifdef HAS_SPLITUVROW_LSX
2146
ANY12(SplitUVRow_Any_LSX, SplitUVRow_LSX, 0, 2, 0, 31)
2147
#endif
2148
#ifdef HAS_ARGBTOUV444ROW_SSSE3
2149
ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
2150
#endif
2151
#ifdef HAS_ARGBTOUVJ444ROW_SSSE3
2152
ANY12(ARGBToUVJ444Row_Any_SSSE3, ARGBToUVJ444Row_SSSE3, 0, 4, 0, 15)
2153
#endif
2154
#ifdef HAS_ARGBTOUV444ROW_AVX2
2155
ANY12(ARGBToUV444Row_Any_AVX2, ARGBToUV444Row_AVX2, 0, 4, 0, 31)
2156
#endif
2157
#ifdef HAS_ARGBTOUVJ444ROW_AVX2
2158
ANY12(ARGBToUVJ444Row_Any_AVX2, ARGBToUVJ444Row_AVX2, 0, 4, 0, 31)
2159
#endif
2160
#ifdef HAS_YUY2TOUV422ROW_AVX2
2161
ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
2162
ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
2163
#endif
2164
#ifdef HAS_YUY2TOUV422ROW_SSE2
2165
ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
2166
ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
2167
#endif
2168
#ifdef HAS_YUY2TOUV422ROW_NEON
2169
ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
2170
ANY12(ARGBToUVJ444Row_Any_NEON, ARGBToUVJ444Row_NEON, 0, 4, 0, 7)
2171
ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
2172
ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
2173
#endif
2174
#ifdef HAS_ARGBTOUV444ROW_NEON_I8MM
2175
ANY12(ARGBToUV444Row_Any_NEON_I8MM, ARGBToUV444Row_NEON_I8MM, 0, 4, 0, 7)
2176
ANY12(ARGBToUVJ444Row_Any_NEON_I8MM, ARGBToUVJ444Row_NEON_I8MM, 0, 4, 0, 7)
2177
#endif
2178
#ifdef HAS_YUY2TOUV422ROW_MSA
2179
ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
2180
ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
2181
ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
2182
#endif
2183
#ifdef HAS_YUY2TOUV422ROW_LSX
2184
ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15)
2185
ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15)
2186
ANY12(UYVYToUV422Row_Any_LSX, UYVYToUV422Row_LSX, 1, 4, 1, 15)
2187
#endif
2188
#ifdef HAS_YUY2TOUV422ROW_LASX
2189
ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31)
2190
ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31)
2191
ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31)
2192
#endif
2193
#undef ANY12
2194
2195
// Any 2 16 bit planes with parameter to 1
2196
#define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                            \
2197
0
  void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
2198
0
    SIMD_ALIGNED(T vin[16 * 2]);                                            \
2199
0
    SIMD_ALIGNED(T vout[16 * 2]);                                           \
2200
0
    memset(vin, 0, sizeof(vin)); /* for msan */                             \
2201
0
    int r = width & MASK;                                                   \
2202
0
    int n = width & ~MASK;                                                  \
2203
0
    if (n > 0) {                                                            \
2204
0
      ANY_SIMD(src_uv, dst_u, dst_v, depth, n);                             \
2205
0
    }                                                                       \
2206
0
    memcpy(vin, src_uv + n * 2, r * BPP * 2);                               \
2207
0
    ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1);                        \
2208
0
    memcpy(dst_u + n, vout, r * BPP);                                       \
2209
0
    memcpy(dst_v + n, vout + 16, r * BPP);                                  \
2210
0
  }
2211
2212
#ifdef HAS_SPLITUVROW_16_AVX2
2213
ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15)
2214
#endif
2215
2216
#ifdef HAS_SPLITUVROW_16_NEON
2217
ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
2218
#endif
2219
2220
#undef ANY21CT
2221
2222
// Any 1 to 3.  Outputs RGB planes.
2223
#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK)                            \
2224
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
2225
0
               uint8_t* dst_b, int width) {                            \
2226
0
    SIMD_ALIGNED(uint8_t vin[16 * 3]);                                 \
2227
0
    SIMD_ALIGNED(uint8_t vout[16 * 3]);                                \
2228
0
    memset(vin, 0, sizeof(vin)); /* for msan */                        \
2229
0
    int r = width & MASK;                                              \
2230
0
    int n = width & ~MASK;                                             \
2231
0
    if (n > 0) {                                                       \
2232
0
      ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n);                       \
2233
0
    }                                                                  \
2234
0
    memcpy(vin, src_ptr + n * BPP, r * BPP);                           \
2235
0
    ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1);               \
2236
0
    memcpy(dst_r + n, vout, r);                                        \
2237
0
    memcpy(dst_g + n, vout + 16, r);                                   \
2238
0
    memcpy(dst_b + n, vout + 32, r);                                   \
2239
0
  }
Unexecuted instantiation: SplitRGBRow_Any_SSSE3
Unexecuted instantiation: SplitRGBRow_Any_SSE41
Unexecuted instantiation: SplitRGBRow_Any_AVX2
Unexecuted instantiation: SplitXRGBRow_Any_SSE2
Unexecuted instantiation: SplitXRGBRow_Any_SSSE3
Unexecuted instantiation: SplitXRGBRow_Any_AVX2
2240
2241
#ifdef HAS_SPLITRGBROW_SSSE3
2242
ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15)
2243
#endif
2244
#ifdef HAS_SPLITRGBROW_SSE41
2245
ANY13(SplitRGBRow_Any_SSE41, SplitRGBRow_SSE41, 3, 15)
2246
#endif
2247
#ifdef HAS_SPLITRGBROW_AVX2
2248
ANY13(SplitRGBRow_Any_AVX2, SplitRGBRow_AVX2, 3, 31)
2249
#endif
2250
#ifdef HAS_SPLITRGBROW_NEON
2251
ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
2252
#endif
2253
#ifdef HAS_SPLITXRGBROW_SSE2
2254
ANY13(SplitXRGBRow_Any_SSE2, SplitXRGBRow_SSE2, 4, 7)
2255
#endif
2256
#ifdef HAS_SPLITXRGBROW_SSSE3
2257
ANY13(SplitXRGBRow_Any_SSSE3, SplitXRGBRow_SSSE3, 4, 7)
2258
#endif
2259
#ifdef HAS_SPLITXRGBROW_AVX2
2260
ANY13(SplitXRGBRow_Any_AVX2, SplitXRGBRow_AVX2, 4, 15)
2261
#endif
2262
#ifdef HAS_SPLITXRGBROW_NEON
2263
ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
2264
#endif
2265
2266
// Any 1 to 4.  Outputs ARGB planes.
2267
#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK)                            \
2268
  void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
2269
0
               uint8_t* dst_b, uint8_t* dst_a, int width) {            \
2270
0
    SIMD_ALIGNED(uint8_t vin[16 * 4]);                                 \
2271
0
    SIMD_ALIGNED(uint8_t vout[16 * 4]);                                \
2272
0
    memset(vin, 0, sizeof(vin)); /* for msan */                        \
2273
0
    int r = width & MASK;                                              \
2274
0
    int n = width & ~MASK;                                             \
2275
0
    if (n > 0) {                                                       \
2276
0
      ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n);                \
2277
0
    }                                                                  \
2278
0
    memcpy(vin, src_ptr + n * BPP, r * BPP);                           \
2279
0
    ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1);    \
2280
0
    memcpy(dst_r + n, vout, r);                                        \
2281
0
    memcpy(dst_g + n, vout + 16, r);                                   \
2282
0
    memcpy(dst_b + n, vout + 32, r);                                   \
2283
0
    memcpy(dst_a + n, vout + 48, r);                                   \
2284
0
  }
Unexecuted instantiation: SplitARGBRow_Any_SSE2
Unexecuted instantiation: SplitARGBRow_Any_SSSE3
Unexecuted instantiation: SplitARGBRow_Any_AVX2
2285
2286
#ifdef HAS_SPLITARGBROW_SSE2
2287
ANY14(SplitARGBRow_Any_SSE2, SplitARGBRow_SSE2, 4, 7)
2288
#endif
2289
#ifdef HAS_SPLITARGBROW_SSSE3
2290
ANY14(SplitARGBRow_Any_SSSE3, SplitARGBRow_SSSE3, 4, 7)
2291
#endif
2292
#ifdef HAS_SPLITARGBROW_AVX2
2293
ANY14(SplitARGBRow_Any_AVX2, SplitARGBRow_AVX2, 4, 15)
2294
#endif
2295
#ifdef HAS_SPLITARGBROW_NEON
2296
ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
2297
#endif
2298
2299
// Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
2300
// 128 byte row allows for 32 avx ARGB pixels.
2301
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
2302
  void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u,       \
2303
2.14M
               uint8_t* dst_v, int width) {                                  \
2304
2.14M
    SIMD_ALIGNED(uint8_t vin[128 * 2]);                                      \
2305
2.14M
    SIMD_ALIGNED(uint8_t vout[128 * 2]);                                     \
2306
2.14M
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
2307
2.14M
    int r = width & MASK;                                                    \
2308
2.14M
    int n = width & ~MASK;                                                   \
2309
2.14M
    if (n > 0) {                                                             \
2310
1.22M
      ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n);                        \
2311
1.22M
    }                                                                        \
2312
2.14M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
2313
2.14M
    memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,           \
2314
2.14M
           SS(r, UVSHIFT) * BPP);                                            \
2315
2.14M
    if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2316
1.69M
      memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP,   \
2317
1.69M
             BPP);                                                           \
2318
1.69M
      memcpy(vin + 128 + SS(r, UVSHIFT) * BPP,                               \
2319
1.69M
             vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                   \
2320
1.69M
    }                                                                        \
2321
2.14M
    ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1);                          \
2322
2.14M
    memcpy(dst_u + (n >> 1), vout, SS(r, 1));                                \
2323
2.14M
    memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1));                          \
2324
2.14M
  }
Unexecuted instantiation: ARGBToUVRow_Any_AVX2
Unexecuted instantiation: ABGRToUVRow_Any_AVX2
ARGBToUVJRow_Any_AVX2
Line
Count
Source
2303
2.14M
               uint8_t* dst_v, int width) {                                  \
2304
2.14M
    SIMD_ALIGNED(uint8_t vin[128 * 2]);                                      \
2305
2.14M
    SIMD_ALIGNED(uint8_t vout[128 * 2]);                                     \
2306
2.14M
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
2307
2.14M
    int r = width & MASK;                                                    \
2308
2.14M
    int n = width & ~MASK;                                                   \
2309
2.14M
    if (n > 0) {                                                             \
2310
1.22M
      ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n);                        \
2311
1.22M
    }                                                                        \
2312
2.14M
    memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
2313
2.14M
    memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,           \
2314
2.14M
           SS(r, UVSHIFT) * BPP);                                            \
2315
2.14M
    if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2316
1.69M
      memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP,   \
2317
1.69M
             BPP);                                                           \
2318
1.69M
      memcpy(vin + 128 + SS(r, UVSHIFT) * BPP,                               \
2319
1.69M
             vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                   \
2320
1.69M
    }                                                                        \
2321
2.14M
    ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1);                          \
2322
2.14M
    memcpy(dst_u + (n >> 1), vout, SS(r, 1));                                \
2323
2.14M
    memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1));                          \
2324
2.14M
  }
ABGRToUVJRow_Any_AVX2
Line
Count
Source
2303
128
               uint8_t* dst_v, int width) {                                  \
2304
128
    SIMD_ALIGNED(uint8_t vin[128 * 2]);                                      \
2305
128
    SIMD_ALIGNED(uint8_t vout[128 * 2]);                                     \
2306
128
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
2307
128
    int r = width & MASK;                                                    \
2308
128
    int n = width & ~MASK;                                                   \
2309
128
    if (n > 0) {                                                             \
2310
128
      ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n);                        \
2311
128
    }                                                                        \
2312
128
    memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
2313
128
    memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,           \
2314
128
           SS(r, UVSHIFT) * BPP);                                            \
2315
128
    if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2316
67
      memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP,   \
2317
67
             BPP);                                                           \
2318
67
      memcpy(vin + 128 + SS(r, UVSHIFT) * BPP,                               \
2319
67
             vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                   \
2320
67
    }                                                                        \
2321
128
    ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1);                          \
2322
128
    memcpy(dst_u + (n >> 1), vout, SS(r, 1));                                \
2323
128
    memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1));                          \
2324
128
  }
Unexecuted instantiation: ARGBToUVJRow_Any_SSSE3
Unexecuted instantiation: ABGRToUVJRow_Any_SSSE3
Unexecuted instantiation: ARGBToUVRow_Any_SSSE3
Unexecuted instantiation: BGRAToUVRow_Any_SSSE3
Unexecuted instantiation: ABGRToUVRow_Any_SSSE3
Unexecuted instantiation: RGBAToUVRow_Any_SSSE3
2325
2326
#ifdef HAS_ARGBTOUVROW_AVX2
2327
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
2328
#endif
2329
#ifdef HAS_ABGRTOUVROW_AVX2
2330
ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
2331
#endif
2332
#ifdef HAS_ARGBTOUVJROW_AVX2
2333
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
2334
#endif
2335
#ifdef HAS_ABGRTOUVJROW_AVX2
2336
ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31)
2337
#endif
2338
#ifdef HAS_ARGBTOUVJROW_SSSE3
2339
ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
2340
#endif
2341
#ifdef HAS_ABGRTOUVJROW_SSSE3
2342
ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15)
2343
#endif
2344
#ifdef HAS_ARGBTOUVROW_SSSE3
2345
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
2346
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
2347
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
2348
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
2349
#endif
2350
#ifdef HAS_YUY2TOUVROW_AVX2
2351
ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
2352
ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
2353
#endif
2354
#ifdef HAS_YUY2TOUVROW_SSE2
2355
ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
2356
ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
2357
#endif
2358
#ifdef HAS_ARGBTOUVROW_NEON
2359
ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
2360
#endif
2361
#ifdef HAS_ARGBTOUVROW_SVE2
2362
ANY12S(ARGBToUVRow_Any_SVE2, ARGBToUVRow_SVE2, 0, 4, 1)
2363
#endif
2364
#ifdef HAS_ARGBTOUVROW_MSA
2365
ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
2366
#endif
2367
#ifdef HAS_ARGBTOUVROW_LSX
2368
ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15)
2369
#endif
2370
#ifdef HAS_ARGBTOUVROW_LASX
2371
ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31)
2372
#endif
2373
#ifdef HAS_ARGBTOUVJROW_NEON
2374
ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
2375
#endif
2376
#ifdef HAS_ARGBTOUVJROW_SVE2
2377
ANY12S(ARGBToUVJRow_Any_SVE2, ARGBToUVJRow_SVE2, 0, 4, 1)
2378
#endif
2379
#ifdef HAS_ABGRTOUVJROW_NEON
2380
ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15)
2381
#endif
2382
#ifdef HAS_ABGRTOUVJROW_SVE2
2383
ANY12S(ABGRToUVJRow_Any_SVE2, ABGRToUVJRow_SVE2, 0, 4, 1)
2384
#endif
2385
#ifdef HAS_ARGBTOUVJROW_MSA
2386
ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
2387
#endif
2388
#ifdef HAS_ARGBTOUVJROW_LSX
2389
ANY12S(ARGBToUVJRow_Any_LSX, ARGBToUVJRow_LSX, 0, 4, 15)
2390
#endif
2391
#ifdef HAS_ARGBTOUVJROW_LASX
2392
ANY12S(ARGBToUVJRow_Any_LASX, ARGBToUVJRow_LASX, 0, 4, 31)
2393
#endif
2394
#ifdef HAS_BGRATOUVROW_NEON
2395
ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
2396
#endif
2397
#ifdef HAS_BGRATOUVROW_SVE2
2398
ANY12S(BGRAToUVRow_Any_SVE2, BGRAToUVRow_SVE2, 0, 4, 1)
2399
#endif
2400
#ifdef HAS_BGRATOUVROW_MSA
2401
ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15)
2402
#endif
2403
#ifdef HAS_BGRATOUVROW_LSX
2404
ANY12S(BGRAToUVRow_Any_LSX, BGRAToUVRow_LSX, 0, 4, 15)
2405
#endif
2406
#ifdef HAS_ABGRTOUVROW_NEON
2407
ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
2408
#endif
2409
#ifdef HAS_ABGRTOUVROW_SVE2
2410
ANY12S(ABGRToUVRow_Any_SVE2, ABGRToUVRow_SVE2, 0, 4, 1)
2411
#endif
2412
#ifdef HAS_ABGRTOUVROW_MSA
2413
ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15)
2414
#endif
2415
#ifdef HAS_ABGRTOUVROW_LSX
2416
ANY12S(ABGRToUVRow_Any_LSX, ABGRToUVRow_LSX, 0, 4, 15)
2417
#endif
2418
#ifdef HAS_RGBATOUVROW_NEON
2419
ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
2420
#endif
2421
#ifdef HAS_RGBATOUVROW_SVE2
2422
ANY12S(RGBAToUVRow_Any_SVE2, RGBAToUVRow_SVE2, 0, 4, 1)
2423
#endif
2424
#ifdef HAS_RGBATOUVROW_MSA
2425
ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15)
2426
#endif
2427
#ifdef HAS_RGBATOUVROW_LSX
2428
ANY12S(RGBAToUVRow_Any_LSX, RGBAToUVRow_LSX, 0, 4, 15)
2429
#endif
2430
#ifdef HAS_RGB24TOUVROW_NEON
2431
ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
2432
#endif
2433
#ifdef HAS_RGB24TOUVJROW_NEON
2434
ANY12S(RGB24ToUVJRow_Any_NEON, RGB24ToUVJRow_NEON, 0, 3, 15)
2435
#endif
2436
#ifdef HAS_RGB24TOUVROW_MSA
2437
ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
2438
#endif
2439
#ifdef HAS_RGB24TOUVROW_LSX
2440
ANY12S(RGB24ToUVRow_Any_LSX, RGB24ToUVRow_LSX, 0, 3, 15)
2441
#endif
2442
#ifdef HAS_RGB24TOUVROW_LASX
2443
ANY12S(RGB24ToUVRow_Any_LASX, RGB24ToUVRow_LASX, 0, 3, 31)
2444
#endif
2445
#ifdef HAS_RAWTOUVROW_NEON
2446
ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
2447
#endif
2448
#ifdef HAS_RAWTOUVJROW_NEON
2449
ANY12S(RAWToUVJRow_Any_NEON, RAWToUVJRow_NEON, 0, 3, 15)
2450
#endif
2451
#ifdef HAS_RAWTOUVROW_MSA
2452
ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
2453
#endif
2454
#ifdef HAS_RAWTOUVROW_LSX
2455
ANY12S(RAWToUVRow_Any_LSX, RAWToUVRow_LSX, 0, 3, 15)
2456
#endif
2457
#ifdef HAS_RAWTOUVROW_LASX
2458
ANY12S(RAWToUVRow_Any_LASX, RAWToUVRow_LASX, 0, 3, 31)
2459
#endif
2460
#ifdef HAS_RGB565TOUVROW_NEON
2461
ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
2462
#endif
2463
#ifdef HAS_RGB565TOUVROW_MSA
2464
ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
2465
#endif
2466
#ifdef HAS_RGB565TOUVROW_LSX
2467
ANY12S(RGB565ToUVRow_Any_LSX, RGB565ToUVRow_LSX, 0, 2, 15)
2468
#endif
2469
#ifdef HAS_RGB565TOUVROW_LASX
2470
ANY12S(RGB565ToUVRow_Any_LASX, RGB565ToUVRow_LASX, 0, 2, 31)
2471
#endif
2472
#ifdef HAS_ARGB1555TOUVROW_NEON
2473
ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
2474
#endif
2475
#ifdef HAS_ARGB1555TOUVROW_MSA
2476
ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
2477
#endif
2478
#ifdef HAS_ARGB1555TOUVROW_LSX
2479
ANY12S(ARGB1555ToUVRow_Any_LSX, ARGB1555ToUVRow_LSX, 0, 2, 15)
2480
#endif
2481
#ifdef HAS_ARGB1555TOUVROW_LASX
2482
ANY12S(ARGB1555ToUVRow_Any_LASX, ARGB1555ToUVRow_LASX, 0, 2, 31)
2483
#endif
2484
#ifdef HAS_ARGB4444TOUVROW_NEON
2485
ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
2486
#endif
2487
#ifdef HAS_YUY2TOUVROW_NEON
2488
ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
2489
#endif
2490
#ifdef HAS_UYVYTOUVROW_NEON
2491
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
2492
#endif
2493
#ifdef HAS_YUY2TOUVROW_MSA
2494
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
2495
#endif
2496
#ifdef HAS_YUY2TOUVROW_LSX
2497
ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15)
2498
#endif
2499
#ifdef HAS_YUY2TOUVROW_LASX
2500
ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31)
2501
#endif
2502
#ifdef HAS_UYVYTOUVROW_MSA
2503
ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
2504
#endif
2505
#ifdef HAS_UYVYTOUVROW_LSX
2506
ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15)
2507
#endif
2508
#ifdef HAS_UYVYTOUVROW_LASX
2509
ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
2510
#endif
2511
#undef ANY12S
2512
2513
// Any 1 to 1 with source stride (2 rows of source).  Outputs UV plane.
2514
// 128 byte row allows for 32 avx ARGB pixels.
2515
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
2516
  void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu,      \
2517
               int width) {                                                  \
2518
    SIMD_ALIGNED(uint8_t vin[128 * 2]);                                      \
2519
    SIMD_ALIGNED(uint8_t vout[128]);                                         \
2520
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
2521
    int r = width & MASK;                                                    \
2522
    int n = width & ~MASK;                                                   \
2523
    if (n > 0) {                                                             \
2524
      ANY_SIMD(src_ptr, src_stride, dst_vu, n);                              \
2525
    }                                                                        \
2526
    memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
2527
    memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,           \
2528
           SS(r, UVSHIFT) * BPP);                                            \
2529
    if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2530
      memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP,   \
2531
             BPP);                                                           \
2532
      memcpy(vin + 128 + SS(r, UVSHIFT) * BPP,                               \
2533
             vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                   \
2534
    }                                                                        \
2535
    ANY_SIMD(vin, 128, vout, MASK + 1);                                      \
2536
    memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2);                       \
2537
  }
2538
2539
#ifdef HAS_AYUVTOVUROW_NEON
2540
ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
2541
ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
2542
#endif
2543
#ifdef HAS_AYUVTOUVROW_SVE2
2544
ANY11S(AYUVToUVRow_Any_SVE2, AYUVToUVRow_SVE2, 0, 4, 1)
2545
#endif
2546
#ifdef HAS_AYUVTOVUROW_SVE2
2547
ANY11S(AYUVToVURow_Any_SVE2, AYUVToVURow_SVE2, 0, 4, 1)
2548
#endif
2549
#undef ANY11S
2550
2551
#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK)                           \
2552
0
  void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \
2553
0
    SIMD_ALIGNED(T vin[16]);                                                 \
2554
0
    SIMD_ALIGNED(T vout[16]);                                                \
2555
0
    memset(vin, 0, sizeof(vin)); /* for msan */                              \
2556
0
    int r = width & MASK;                                                    \
2557
0
    int n = width & ~MASK;                                                   \
2558
0
    if (n > 0) {                                                             \
2559
0
      ANY_SIMD(src, src_tile_stride, dst, n);                                \
2560
0
    }                                                                        \
2561
0
    memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP);                  \
2562
0
    ANY_SIMD(vin, src_tile_stride, vout, MASK + 1);                          \
2563
0
    memcpy(dst + n, vout, r * BPP);                                          \
2564
0
  }
Unexecuted instantiation: DetileRow_Any_SSE2
Unexecuted instantiation: DetileRow_16_Any_SSE2
Unexecuted instantiation: DetileRow_16_Any_AVX
2565
2566
#ifdef HAS_DETILEROW_NEON
2567
ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15)
2568
#endif
2569
#ifdef HAS_DETILEROW_SSE2
2570
ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15)
2571
#endif
2572
#ifdef HAS_DETILEROW_16_NEON
2573
ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
2574
#endif
2575
#ifdef HAS_DETILEROW_16_SSE2
2576
ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
2577
#endif
2578
#ifdef HAS_DETILEROW_16_AVX
2579
ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15)
2580
#endif
2581
2582
// DetileSplitUVRow width is in bytes
2583
#define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK)                \
2584
  void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \
2585
0
               uint8_t* dst_u, uint8_t* dst_v, int width) {      \
2586
0
    SIMD_ALIGNED(uint8_t vin[16]);                               \
2587
0
    SIMD_ALIGNED(uint8_t vout[8 * 2]);                           \
2588
0
    memset(vin, 0, sizeof(vin)); /* for msan */                  \
2589
0
    int r = width & MASK;                                        \
2590
0
    int n = width & ~MASK;                                       \
2591
0
    if (n > 0) {                                                 \
2592
0
      ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n);        \
2593
0
    }                                                            \
2594
0
    memcpy(vin, src_uv + (n / 16) * src_tile_stride, r);         \
2595
0
    ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r);           \
2596
0
    memcpy(dst_u + n / 2, vout, (r + 1) / 2);                    \
2597
0
    memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2);                \
2598
0
  }
2599
2600
#ifdef HAS_DETILESPLITUVROW_NEON
2601
ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15)
2602
#endif
2603
#ifdef HAS_DETILESPLITUVROW_SSSE3
2604
ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
2605
#endif
2606
2607
#define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK)                                \
2608
  void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,              \
2609
               const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,            \
2610
0
               uint8_t* dst_yuy2, int width) {                                 \
2611
0
    SIMD_ALIGNED(uint8_t vin[16 * 2]);                                         \
2612
0
    SIMD_ALIGNED(uint8_t vout[16 * 2]);                                        \
2613
0
    memset(vin, 0, sizeof(vin)); /* for msan */                                \
2614
0
    int r = width & MASK;                                                      \
2615
0
    int n = width & ~MASK;                                                     \
2616
0
    if (n > 0) {                                                               \
2617
0
      ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
2618
0
               n);                                                             \
2619
0
    }                                                                          \
2620
0
    memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r);                      \
2621
0
    memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r);               \
2622
0
    ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r);   \
2623
0
    memcpy(dst_yuy2 + 2 * n, vout, 2 * r);                                     \
2624
0
  }
2625
2626
#ifdef HAS_DETILETOYUY2_NEON
2627
ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15)
2628
#endif
2629
2630
#ifdef HAS_DETILETOYUY2_SSE2
2631
ANYDETILEMERGE(DetileToYUY2_Any_SSE2, DetileToYUY2_SSE2, 15)
2632
#endif
2633
2634
#ifdef __cplusplus
2635
}  // extern "C"
2636
}  // namespace libyuv
2637
#endif