Coverage Report

Created: 2026-01-17 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/planar_functions.cc
Line
Count
Source
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/planar_functions.h"
12
13
#include <assert.h>
14
#include <string.h>  // for memset()
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/row.h"
18
#include "libyuv/scale_row.h"  // for ScaleRowDown2
19
20
#ifdef __cplusplus
21
namespace libyuv {
22
extern "C" {
23
#endif
24
25
// Copy a plane of data
26
LIBYUV_API
27
void CopyPlane(const uint8_t* src_y,
28
               int src_stride_y,
29
               uint8_t* dst_y,
30
               int dst_stride_y,
31
               int width,
32
428
               int height) {
33
428
  int y;
34
428
  void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
35
428
  if (width <= 0 || height == 0) {
36
0
    return;
37
0
  }
38
  // Negative height means invert the image.
39
428
  if (height < 0) {
40
0
    height = -height;
41
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
42
0
    dst_stride_y = -dst_stride_y;
43
0
  }
44
  // Coalesce rows.
45
428
  if (src_stride_y == width && dst_stride_y == width) {
46
18
    width *= height;
47
18
    height = 1;
48
18
    src_stride_y = dst_stride_y = 0;
49
18
  }
50
  // Nothing to do.
51
428
  if (src_y == dst_y && src_stride_y == dst_stride_y) {
52
0
    return;
53
0
  }
54
55
428
#if defined(HAS_COPYROW_SSE2)
56
428
  if (TestCpuFlag(kCpuHasSSE2)) {
57
428
    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
58
428
  }
59
428
#endif
60
428
#if defined(HAS_COPYROW_AVX)
61
428
  if (TestCpuFlag(kCpuHasAVX)) {
62
428
    CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
63
428
  }
64
428
#endif
65
428
#if defined(HAS_COPYROW_AVX512BW)
66
428
  if (TestCpuFlag(kCpuHasAVX512BW)) {
67
0
    CopyRow = IS_ALIGNED(width, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
68
0
  }
69
428
#endif
70
428
#if defined(HAS_COPYROW_ERMS)
71
428
  if (TestCpuFlag(kCpuHasERMS)) {
72
428
    CopyRow = CopyRow_ERMS;
73
428
  }
74
428
#endif
75
#if defined(HAS_COPYROW_NEON)
76
  if (TestCpuFlag(kCpuHasNEON)) {
77
    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
78
  }
79
#endif
80
#if defined(HAS_COPYROW_SME)
81
  if (TestCpuFlag(kCpuHasSME)) {
82
    CopyRow = CopyRow_SME;
83
  }
84
#endif
85
#if defined(HAS_COPYROW_RVV)
86
  if (TestCpuFlag(kCpuHasRVV)) {
87
    CopyRow = CopyRow_RVV;
88
  }
89
#endif
90
91
  // Copy plane
92
26.4k
  for (y = 0; y < height; ++y) {
93
26.0k
    CopyRow(src_y, dst_y, width);
94
26.0k
    src_y += src_stride_y;
95
26.0k
    dst_y += dst_stride_y;
96
26.0k
  }
97
428
}
98
99
LIBYUV_API
100
void CopyPlane_16(const uint16_t* src_y,
101
                  int src_stride_y,
102
                  uint16_t* dst_y,
103
                  int dst_stride_y,
104
                  int width,
105
316
                  int height) {
106
316
  CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y,
107
316
            dst_stride_y * 2, width * 2, height);
108
316
}
109
110
// Convert a plane of 16 bit data to 8 bit
111
LIBYUV_API
112
void Convert16To8Plane(const uint16_t* src_y,
113
                       int src_stride_y,
114
                       uint8_t* dst_y,
115
                       int dst_stride_y,
116
                       int scale,  // 16384 for 10 bits
117
                       int width,
118
725
                       int height) {
119
725
  int y;
120
725
  void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
121
725
                          int width) = Convert16To8Row_C;
122
123
725
  if (width <= 0 || height == 0) {
124
0
    return;
125
0
  }
126
  // Negative height means invert the image.
127
725
  if (height < 0) {
128
0
    height = -height;
129
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
130
0
    dst_stride_y = -dst_stride_y;
131
0
  }
132
  // Coalesce rows.
133
725
  if (src_stride_y == width && dst_stride_y == width) {
134
725
    width *= height;
135
725
    height = 1;
136
725
    src_stride_y = dst_stride_y = 0;
137
725
  }
138
#if defined(HAS_CONVERT16TO8ROW_NEON)
139
  if (TestCpuFlag(kCpuHasNEON)) {
140
    Convert16To8Row = Convert16To8Row_Any_NEON;
141
    if (IS_ALIGNED(width, 16)) {
142
      Convert16To8Row = Convert16To8Row_NEON;
143
    }
144
  }
145
#endif
146
#if defined(HAS_CONVERT16TO8ROW_SME)
147
  if (TestCpuFlag(kCpuHasSME)) {
148
    Convert16To8Row = Convert16To8Row_SME;
149
  }
150
#endif
151
725
#if defined(HAS_CONVERT16TO8ROW_SSSE3)
152
725
  if (TestCpuFlag(kCpuHasSSSE3)) {
153
725
    Convert16To8Row = Convert16To8Row_Any_SSSE3;
154
725
    if (IS_ALIGNED(width, 16)) {
155
270
      Convert16To8Row = Convert16To8Row_SSSE3;
156
270
    }
157
725
  }
158
725
#endif
159
725
#if defined(HAS_CONVERT16TO8ROW_AVX2)
160
725
  if (TestCpuFlag(kCpuHasAVX2)) {
161
725
    Convert16To8Row = Convert16To8Row_Any_AVX2;
162
725
    if (IS_ALIGNED(width, 32)) {
163
138
      Convert16To8Row = Convert16To8Row_AVX2;
164
138
    }
165
725
  }
166
725
#endif
167
725
#if defined(HAS_CONVERT16TO8ROW_AVX512BW)
168
725
  if (TestCpuFlag(kCpuHasAVX512BW)) {
169
0
    Convert16To8Row = Convert16To8Row_Any_AVX512BW;
170
0
    if (IS_ALIGNED(width, 64)) {
171
0
      Convert16To8Row = Convert16To8Row_AVX512BW;
172
0
    }
173
0
  }
174
725
#endif
175
176
  // Convert plane
177
1.45k
  for (y = 0; y < height; ++y) {
178
725
    Convert16To8Row(src_y, dst_y, scale, width);
179
725
    src_y += src_stride_y;
180
725
    dst_y += dst_stride_y;
181
725
  }
182
725
}
183
184
// Convert a plane of 8 bit data to 16 bit
185
LIBYUV_API
186
void Convert8To16Plane(const uint8_t* src_y,
187
                       int src_stride_y,
188
                       uint16_t* dst_y,
189
                       int dst_stride_y,
190
                       int scale,  // 1024 for 10 bits
191
                       int width,
192
0
                       int height) {
193
0
  int y;
194
0
  void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
195
0
                          int width) = Convert8To16Row_C;
196
197
0
  if (width <= 0 || height == 0) {
198
0
    return;
199
0
  }
200
  // Negative height means invert the image.
201
0
  if (height < 0) {
202
0
    height = -height;
203
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
204
0
    dst_stride_y = -dst_stride_y;
205
0
  }
206
  // Coalesce rows.
207
0
  if (src_stride_y == width && dst_stride_y == width) {
208
0
    width *= height;
209
0
    height = 1;
210
0
    src_stride_y = dst_stride_y = 0;
211
0
  }
212
0
#if defined(HAS_CONVERT8TO16ROW_SSE2)
213
0
  if (TestCpuFlag(kCpuHasSSE2)) {
214
0
    Convert8To16Row = Convert8To16Row_Any_SSE2;
215
0
    if (IS_ALIGNED(width, 16)) {
216
0
      Convert8To16Row = Convert8To16Row_SSE2;
217
0
    }
218
0
  }
219
0
#endif
220
0
#if defined(HAS_CONVERT8TO16ROW_AVX2)
221
0
  if (TestCpuFlag(kCpuHasAVX2)) {
222
0
    Convert8To16Row = Convert8To16Row_Any_AVX2;
223
0
    if (IS_ALIGNED(width, 32)) {
224
0
      Convert8To16Row = Convert8To16Row_AVX2;
225
0
    }
226
0
  }
227
0
#endif
228
#if defined(HAS_CONVERT8TO16ROW_NEON)
229
  if (TestCpuFlag(kCpuHasNEON)) {
230
    Convert8To16Row = Convert8To16Row_Any_NEON;
231
    if (IS_ALIGNED(width, 16)) {
232
      Convert8To16Row = Convert8To16Row_NEON;
233
    }
234
  }
235
#endif
236
#if defined(HAS_CONVERT8TO16ROW_SME)
237
  if (TestCpuFlag(kCpuHasSME)) {
238
    Convert8To16Row = Convert8To16Row_SME;
239
  }
240
#endif
241
242
  // Convert plane
243
0
  for (y = 0; y < height; ++y) {
244
0
    Convert8To16Row(src_y, dst_y, scale, width);
245
0
    src_y += src_stride_y;
246
0
    dst_y += dst_stride_y;
247
0
  }
248
0
}
249
250
// Convert a plane of 8 bit data to 8 bit
251
LIBYUV_API
252
void Convert8To8Plane(const uint8_t* src_y,
253
                      int src_stride_y,
254
                      uint8_t* dst_y,
255
                      int dst_stride_y,
256
                      int scale,  // 220 for Y, 225 to UV
257
                      int bias,   // 16
258
                      int width,
259
0
                      int height) {
260
0
  int y;
261
0
  void (*Convert8To8Row)(const uint8_t* src_y, uint8_t* dst_y, int scale,
262
0
                         int bias, int width) = Convert8To8Row_C;
263
264
0
  if (width <= 0 || height == 0) {
265
0
    return;
266
0
  }
267
  // Negative height means invert the image.
268
0
  if (height < 0) {
269
0
    height = -height;
270
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
271
0
    dst_stride_y = -dst_stride_y;
272
0
  }
273
  // Coalesce rows.
274
0
  if (src_stride_y == width && dst_stride_y == width) {
275
0
    width *= height;
276
0
    height = 1;
277
0
    src_stride_y = dst_stride_y = 0;
278
0
  }
279
#if defined(HAS_CONVERT8TO8ROW_NEON)
280
  if (TestCpuFlag(kCpuHasNEON)) {
281
    Convert8To8Row = Convert8To8Row_Any_NEON;
282
    if (IS_ALIGNED(width, 32)) {
283
      Convert8To8Row = Convert8To8Row_NEON;
284
    }
285
  }
286
#endif
287
#if defined(HAS_CONVERT8TO8ROW_SVE2)
288
  if (TestCpuFlag(kCpuHasSVE2)) {
289
    Convert8To8Row = Convert8To8Row_SVE2;
290
  }
291
#endif
292
#if defined(HAS_CONVERT8TO8ROW_SME)
293
  if (TestCpuFlag(kCpuHasSME)) {
294
    Convert8To8Row = Convert8To8Row_SME;
295
  }
296
#endif
297
0
#if defined(HAS_CONVERT8TO8ROW_AVX2)
298
0
  if (TestCpuFlag(kCpuHasAVX2)) {
299
0
    Convert8To8Row = Convert8To8Row_Any_AVX2;
300
0
    if (IS_ALIGNED(width, 32)) {
301
0
      Convert8To8Row = Convert8To8Row_AVX2;
302
0
    }
303
0
  }
304
0
#endif
305
306
  // Convert plane
307
0
  for (y = 0; y < height; ++y) {
308
0
    Convert8To8Row(src_y, dst_y, scale, bias, width);
309
0
    src_y += src_stride_y;
310
0
    dst_y += dst_stride_y;
311
0
  }
312
0
}
313
314
// Copy I422.
315
LIBYUV_API
316
int I422Copy(const uint8_t* src_y,
317
             int src_stride_y,
318
             const uint8_t* src_u,
319
             int src_stride_u,
320
             const uint8_t* src_v,
321
             int src_stride_v,
322
             uint8_t* dst_y,
323
             int dst_stride_y,
324
             uint8_t* dst_u,
325
             int dst_stride_u,
326
             uint8_t* dst_v,
327
             int dst_stride_v,
328
             int width,
329
0
             int height) {
330
0
  int halfwidth = (width + 1) >> 1;
331
332
0
  if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
333
0
      height == 0) {
334
0
    return -1;
335
0
  }
336
337
  // Negative height means invert the image.
338
0
  if (height < 0) {
339
0
    height = -height;
340
0
    src_y = src_y + (height - 1) * src_stride_y;
341
0
    src_u = src_u + (height - 1) * src_stride_u;
342
0
    src_v = src_v + (height - 1) * src_stride_v;
343
0
    src_stride_y = -src_stride_y;
344
0
    src_stride_u = -src_stride_u;
345
0
    src_stride_v = -src_stride_v;
346
0
  }
347
348
0
  if (dst_y) {
349
0
    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
350
0
  }
351
0
  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
352
0
  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
353
0
  return 0;
354
0
}
355
356
// Copy I444.
357
LIBYUV_API
358
int I444Copy(const uint8_t* src_y,
359
             int src_stride_y,
360
             const uint8_t* src_u,
361
             int src_stride_u,
362
             const uint8_t* src_v,
363
             int src_stride_v,
364
             uint8_t* dst_y,
365
             int dst_stride_y,
366
             uint8_t* dst_u,
367
             int dst_stride_u,
368
             uint8_t* dst_v,
369
             int dst_stride_v,
370
             int width,
371
0
             int height) {
372
0
  if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
373
0
      height == 0) {
374
0
    return -1;
375
0
  }
376
  // Negative height means invert the image.
377
0
  if (height < 0) {
378
0
    height = -height;
379
0
    src_y = src_y + (height - 1) * src_stride_y;
380
0
    src_u = src_u + (height - 1) * src_stride_u;
381
0
    src_v = src_v + (height - 1) * src_stride_v;
382
0
    src_stride_y = -src_stride_y;
383
0
    src_stride_u = -src_stride_u;
384
0
    src_stride_v = -src_stride_v;
385
0
  }
386
387
0
  if (dst_y) {
388
0
    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
389
0
  }
390
0
  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
391
0
  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
392
0
  return 0;
393
0
}
394
395
// Copy I210.
396
LIBYUV_API
397
int I210Copy(const uint16_t* src_y,
398
             int src_stride_y,
399
             const uint16_t* src_u,
400
             int src_stride_u,
401
             const uint16_t* src_v,
402
             int src_stride_v,
403
             uint16_t* dst_y,
404
             int dst_stride_y,
405
             uint16_t* dst_u,
406
             int dst_stride_u,
407
             uint16_t* dst_v,
408
             int dst_stride_v,
409
             int width,
410
0
             int height) {
411
0
  int halfwidth = (width + 1) >> 1;
412
413
0
  if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
414
0
      height == 0) {
415
0
    return -1;
416
0
  }
417
418
  // Negative height means invert the image.
419
0
  if (height < 0) {
420
0
    height = -height;
421
0
    src_y = src_y + (height - 1) * src_stride_y;
422
0
    src_u = src_u + (height - 1) * src_stride_u;
423
0
    src_v = src_v + (height - 1) * src_stride_v;
424
0
    src_stride_y = -src_stride_y;
425
0
    src_stride_u = -src_stride_u;
426
0
    src_stride_v = -src_stride_v;
427
0
  }
428
429
0
  if (dst_y) {
430
0
    CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
431
0
  }
432
  // Copy UV planes.
433
0
  CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
434
0
  CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
435
0
  return 0;
436
0
}
437
438
// Copy I410.
439
LIBYUV_API
440
int I410Copy(const uint16_t* src_y,
441
             int src_stride_y,
442
             const uint16_t* src_u,
443
             int src_stride_u,
444
             const uint16_t* src_v,
445
             int src_stride_v,
446
             uint16_t* dst_y,
447
             int dst_stride_y,
448
             uint16_t* dst_u,
449
             int dst_stride_u,
450
             uint16_t* dst_v,
451
             int dst_stride_v,
452
             int width,
453
0
             int height) {
454
0
  if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
455
0
      height == 0) {
456
0
    return -1;
457
0
  }
458
  // Negative height means invert the image.
459
0
  if (height < 0) {
460
0
    height = -height;
461
0
    src_y = src_y + (height - 1) * src_stride_y;
462
0
    src_u = src_u + (height - 1) * src_stride_u;
463
0
    src_v = src_v + (height - 1) * src_stride_v;
464
0
    src_stride_y = -src_stride_y;
465
0
    src_stride_u = -src_stride_u;
466
0
    src_stride_v = -src_stride_v;
467
0
  }
468
469
0
  if (dst_y) {
470
0
    CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
471
0
  }
472
0
  CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
473
0
  CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
474
0
  return 0;
475
0
}
476
477
// Copy I400.
478
LIBYUV_API
479
int I400ToI400(const uint8_t* src_y,
480
               int src_stride_y,
481
               uint8_t* dst_y,
482
               int dst_stride_y,
483
               int width,
484
0
               int height) {
485
0
  if (!src_y || !dst_y || width <= 0 || height == 0) {
486
0
    return -1;
487
0
  }
488
  // Negative height means invert the image.
489
0
  if (height < 0) {
490
0
    height = -height;
491
0
    src_y = src_y + (height - 1) * src_stride_y;
492
0
    src_stride_y = -src_stride_y;
493
0
  }
494
0
  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
495
0
  return 0;
496
0
}
497
498
// Convert I420 to I400.
499
LIBYUV_API
500
int I420ToI400(const uint8_t* src_y,
501
               int src_stride_y,
502
               const uint8_t* src_u,
503
               int src_stride_u,
504
               const uint8_t* src_v,
505
               int src_stride_v,
506
               uint8_t* dst_y,
507
               int dst_stride_y,
508
               int width,
509
0
               int height) {
510
0
  (void)src_u;
511
0
  (void)src_stride_u;
512
0
  (void)src_v;
513
0
  (void)src_stride_v;
514
0
  if (!src_y || !dst_y || width <= 0 || height == 0) {
515
0
    return -1;
516
0
  }
517
  // Negative height means invert the image.
518
0
  if (height < 0) {
519
0
    height = -height;
520
0
    src_y = src_y + (height - 1) * src_stride_y;
521
0
    src_stride_y = -src_stride_y;
522
0
  }
523
524
0
  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
525
0
  return 0;
526
0
}
527
528
// Copy NV12. Supports inverting.
529
LIBYUV_API
530
int NV12Copy(const uint8_t* src_y,
531
             int src_stride_y,
532
             const uint8_t* src_uv,
533
             int src_stride_uv,
534
             uint8_t* dst_y,
535
             int dst_stride_y,
536
             uint8_t* dst_uv,
537
             int dst_stride_uv,
538
             int width,
539
0
             int height) {
540
0
  int halfwidth = (width + 1) >> 1;
541
0
  int halfheight = (height + 1) >> 1;
542
543
0
  if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
544
0
    return -1;
545
0
  }
546
547
  // Negative height means invert the image.
548
0
  if (height < 0) {
549
0
    height = -height;
550
0
    halfheight = (height + 1) >> 1;
551
0
    src_y = src_y + (height - 1) * src_stride_y;
552
0
    src_uv = src_uv + (halfheight - 1) * src_stride_uv;
553
0
    src_stride_y = -src_stride_y;
554
0
    src_stride_uv = -src_stride_uv;
555
0
  }
556
0
  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
557
0
  CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth * 2,
558
0
            halfheight);
559
0
  return 0;
560
0
}
561
562
// Copy NV21. Supports inverting.
563
LIBYUV_API
564
int NV21Copy(const uint8_t* src_y,
565
             int src_stride_y,
566
             const uint8_t* src_vu,
567
             int src_stride_vu,
568
             uint8_t* dst_y,
569
             int dst_stride_y,
570
             uint8_t* dst_vu,
571
             int dst_stride_vu,
572
             int width,
573
0
             int height) {
574
0
  return NV12Copy(src_y, src_stride_y, src_vu, src_stride_vu, dst_y,
575
0
                  dst_stride_y, dst_vu, dst_stride_vu, width, height);
576
0
}
577
578
// Support function for NV12 etc UV channels.
579
// Width and height are plane sizes (typically half pixel width).
580
LIBYUV_API
581
void SplitUVPlane(const uint8_t* src_uv,
582
                  int src_stride_uv,
583
                  uint8_t* dst_u,
584
                  int dst_stride_u,
585
                  uint8_t* dst_v,
586
                  int dst_stride_v,
587
                  int width,
588
0
                  int height) {
589
0
  int y;
590
0
  void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
591
0
                     int width) = SplitUVRow_C;
592
0
  if (width <= 0 || height == 0) {
593
0
    return;
594
0
  }
595
  // Negative height means invert the image.
596
0
  if (height < 0) {
597
0
    height = -height;
598
0
    dst_u = dst_u + (height - 1) * dst_stride_u;
599
0
    dst_v = dst_v + (height - 1) * dst_stride_v;
600
0
    dst_stride_u = -dst_stride_u;
601
0
    dst_stride_v = -dst_stride_v;
602
0
  }
603
  // Coalesce rows.
604
0
  if (src_stride_uv == width * 2 && dst_stride_u == width &&
605
0
      dst_stride_v == width) {
606
0
    width *= height;
607
0
    height = 1;
608
0
    src_stride_uv = dst_stride_u = dst_stride_v = 0;
609
0
  }
610
0
#if defined(HAS_SPLITUVROW_SSE2)
611
0
  if (TestCpuFlag(kCpuHasSSE2)) {
612
0
    SplitUVRow = SplitUVRow_Any_SSE2;
613
0
    if (IS_ALIGNED(width, 16)) {
614
0
      SplitUVRow = SplitUVRow_SSE2;
615
0
    }
616
0
  }
617
0
#endif
618
0
#if defined(HAS_SPLITUVROW_AVX2)
619
0
  if (TestCpuFlag(kCpuHasAVX2)) {
620
0
    SplitUVRow = SplitUVRow_Any_AVX2;
621
0
    if (IS_ALIGNED(width, 32)) {
622
0
      SplitUVRow = SplitUVRow_AVX2;
623
0
    }
624
0
  }
625
0
#endif
626
#if defined(HAS_SPLITUVROW_NEON)
627
  if (TestCpuFlag(kCpuHasNEON)) {
628
    SplitUVRow = SplitUVRow_Any_NEON;
629
    if (IS_ALIGNED(width, 16)) {
630
      SplitUVRow = SplitUVRow_NEON;
631
    }
632
  }
633
#endif
634
#if defined(HAS_SPLITUVROW_LSX)
635
  if (TestCpuFlag(kCpuHasLSX)) {
636
    SplitUVRow = SplitUVRow_Any_LSX;
637
    if (IS_ALIGNED(width, 32)) {
638
      SplitUVRow = SplitUVRow_LSX;
639
    }
640
  }
641
#endif
642
#if defined(HAS_SPLITUVROW_RVV)
643
  if (TestCpuFlag(kCpuHasRVV)) {
644
    SplitUVRow = SplitUVRow_RVV;
645
  }
646
#endif
647
648
0
  for (y = 0; y < height; ++y) {
649
    // Copy a row of UV.
650
0
    SplitUVRow(src_uv, dst_u, dst_v, width);
651
0
    dst_u += dst_stride_u;
652
0
    dst_v += dst_stride_v;
653
0
    src_uv += src_stride_uv;
654
0
  }
655
0
}
656
657
LIBYUV_API
658
void MergeUVPlane(const uint8_t* src_u,
659
                  int src_stride_u,
660
                  const uint8_t* src_v,
661
                  int src_stride_v,
662
                  uint8_t* dst_uv,
663
                  int dst_stride_uv,
664
                  int width,
665
0
                  int height) {
666
0
  int y;
667
0
  void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
668
0
                     uint8_t* dst_uv, int width) = MergeUVRow_C;
669
0
  if (width <= 0 || height == 0) {
670
0
    return;
671
0
  }
672
  // Negative height means invert the image.
673
0
  if (height < 0) {
674
0
    height = -height;
675
0
    dst_uv = dst_uv + (height - 1) * dst_stride_uv;
676
0
    dst_stride_uv = -dst_stride_uv;
677
0
  }
678
  // Coalesce rows.
679
0
  if (src_stride_u == width && src_stride_v == width &&
680
0
      dst_stride_uv == width * 2) {
681
0
    width *= height;
682
0
    height = 1;
683
0
    src_stride_u = src_stride_v = dst_stride_uv = 0;
684
0
  }
685
0
#if defined(HAS_MERGEUVROW_SSE2)
686
0
  if (TestCpuFlag(kCpuHasSSE2)) {
687
0
    MergeUVRow = MergeUVRow_Any_SSE2;
688
0
    if (IS_ALIGNED(width, 16)) {
689
0
      MergeUVRow = MergeUVRow_SSE2;
690
0
    }
691
0
  }
692
0
#endif
693
0
#if defined(HAS_MERGEUVROW_AVX2)
694
0
  if (TestCpuFlag(kCpuHasAVX2)) {
695
0
    MergeUVRow = MergeUVRow_Any_AVX2;
696
0
    if (IS_ALIGNED(width, 16)) {
697
0
      MergeUVRow = MergeUVRow_AVX2;
698
0
    }
699
0
  }
700
0
#endif
701
0
#if defined(HAS_MERGEUVROW_AVX512BW)
702
0
  if (TestCpuFlag(kCpuHasAVX512BW)) {
703
0
    MergeUVRow = MergeUVRow_Any_AVX512BW;
704
0
    if (IS_ALIGNED(width, 32)) {
705
0
      MergeUVRow = MergeUVRow_AVX512BW;
706
0
    }
707
0
  }
708
0
#endif
709
#if defined(HAS_MERGEUVROW_NEON)
710
  if (TestCpuFlag(kCpuHasNEON)) {
711
    MergeUVRow = MergeUVRow_Any_NEON;
712
    if (IS_ALIGNED(width, 16)) {
713
      MergeUVRow = MergeUVRow_NEON;
714
    }
715
  }
716
#endif
717
#if defined(HAS_MERGEUVROW_SME)
718
  if (TestCpuFlag(kCpuHasSME)) {
719
    MergeUVRow = MergeUVRow_SME;
720
  }
721
#endif
722
#if defined(HAS_MERGEUVROW_LSX)
723
  if (TestCpuFlag(kCpuHasLSX)) {
724
    MergeUVRow = MergeUVRow_Any_LSX;
725
    if (IS_ALIGNED(width, 16)) {
726
      MergeUVRow = MergeUVRow_LSX;
727
    }
728
  }
729
#endif
730
#if defined(HAS_MERGEUVROW_RVV)
731
  if (TestCpuFlag(kCpuHasRVV)) {
732
    MergeUVRow = MergeUVRow_RVV;
733
  }
734
#endif
735
736
0
  for (y = 0; y < height; ++y) {
737
    // Merge a row of U and V into a row of UV.
738
0
    MergeUVRow(src_u, src_v, dst_uv, width);
739
0
    src_u += src_stride_u;
740
0
    src_v += src_stride_v;
741
0
    dst_uv += dst_stride_uv;
742
0
  }
743
0
}
744
745
// Support function for P010 etc UV channels.
746
// Width and height are plane sizes (typically half pixel width).
747
LIBYUV_API
748
void SplitUVPlane_16(const uint16_t* src_uv,
749
                     int src_stride_uv,
750
                     uint16_t* dst_u,
751
                     int dst_stride_u,
752
                     uint16_t* dst_v,
753
                     int dst_stride_v,
754
                     int width,
755
                     int height,
756
0
                     int depth) {
757
0
  int y;
758
0
  void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u,
759
0
                        uint16_t* dst_v, int depth, int width) =
760
0
      SplitUVRow_16_C;
761
0
  if (width <= 0 || height == 0) {
762
0
    return;
763
0
  }
764
  // Negative height means invert the image.
765
0
  if (height < 0) {
766
0
    height = -height;
767
0
    dst_u = dst_u + (height - 1) * dst_stride_u;
768
0
    dst_v = dst_v + (height - 1) * dst_stride_v;
769
0
    dst_stride_u = -dst_stride_u;
770
0
    dst_stride_v = -dst_stride_v;
771
0
  }
772
  // Coalesce rows.
773
0
  if (src_stride_uv == width * 2 && dst_stride_u == width &&
774
0
      dst_stride_v == width) {
775
0
    width *= height;
776
0
    height = 1;
777
0
    src_stride_uv = dst_stride_u = dst_stride_v = 0;
778
0
  }
779
0
#if defined(HAS_SPLITUVROW_16_AVX2)
780
0
  if (TestCpuFlag(kCpuHasAVX2)) {
781
0
    SplitUVRow_16 = SplitUVRow_16_Any_AVX2;
782
0
    if (IS_ALIGNED(width, 16)) {
783
0
      SplitUVRow_16 = SplitUVRow_16_AVX2;
784
0
    }
785
0
  }
786
0
#endif
787
#if defined(HAS_SPLITUVROW_16_NEON)
788
  if (TestCpuFlag(kCpuHasNEON)) {
789
    SplitUVRow_16 = SplitUVRow_16_Any_NEON;
790
    if (IS_ALIGNED(width, 8)) {
791
      SplitUVRow_16 = SplitUVRow_16_NEON;
792
    }
793
  }
794
#endif
795
796
0
  for (y = 0; y < height; ++y) {
797
    // Copy a row of UV.
798
0
    SplitUVRow_16(src_uv, dst_u, dst_v, depth, width);
799
0
    dst_u += dst_stride_u;
800
0
    dst_v += dst_stride_v;
801
0
    src_uv += src_stride_uv;
802
0
  }
803
0
}
804
805
LIBYUV_API
806
void MergeUVPlane_16(const uint16_t* src_u,
807
                     int src_stride_u,
808
                     const uint16_t* src_v,
809
                     int src_stride_v,
810
                     uint16_t* dst_uv,
811
                     int dst_stride_uv,
812
                     int width,
813
                     int height,
814
0
                     int depth) {
815
0
  int y;
816
0
  void (*MergeUVRow_16)(const uint16_t* src_u, const uint16_t* src_v,
817
0
                        uint16_t* dst_uv, int depth, int width) =
818
0
      MergeUVRow_16_C;
819
0
  assert(depth >= 8);
820
0
  assert(depth <= 16);
821
0
  if (width <= 0 || height == 0) {
822
0
    return;
823
0
  }
824
  // Negative height means invert the image.
825
0
  if (height < 0) {
826
0
    height = -height;
827
0
    dst_uv = dst_uv + (height - 1) * dst_stride_uv;
828
0
    dst_stride_uv = -dst_stride_uv;
829
0
  }
830
  // Coalesce rows.
831
0
  if (src_stride_u == width && src_stride_v == width &&
832
0
      dst_stride_uv == width * 2) {
833
0
    width *= height;
834
0
    height = 1;
835
0
    src_stride_u = src_stride_v = dst_stride_uv = 0;
836
0
  }
837
0
#if defined(HAS_MERGEUVROW_16_AVX2)
838
0
  if (TestCpuFlag(kCpuHasAVX2)) {
839
0
    MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
840
0
    if (IS_ALIGNED(width, 8)) {
841
0
      MergeUVRow_16 = MergeUVRow_16_AVX2;
842
0
    }
843
0
  }
844
0
#endif
845
#if defined(HAS_MERGEUVROW_16_NEON)
846
  if (TestCpuFlag(kCpuHasNEON)) {
847
    MergeUVRow_16 = MergeUVRow_16_Any_NEON;
848
    if (IS_ALIGNED(width, 8)) {
849
      MergeUVRow_16 = MergeUVRow_16_NEON;
850
    }
851
  }
852
#endif
853
#if defined(HAS_MERGEUVROW_16_SME)
854
  if (TestCpuFlag(kCpuHasSME)) {
855
    MergeUVRow_16 = MergeUVRow_16_SME;
856
  }
857
#endif
858
859
0
  for (y = 0; y < height; ++y) {
860
    // Merge a row of U and V into a row of UV.
861
0
    MergeUVRow_16(src_u, src_v, dst_uv, depth, width);
862
0
    src_u += src_stride_u;
863
0
    src_v += src_stride_v;
864
0
    dst_uv += dst_stride_uv;
865
0
  }
866
0
}
867
868
// Convert plane from lsb to msb
869
LIBYUV_API
870
void ConvertToMSBPlane_16(const uint16_t* src_y,
871
                          int src_stride_y,
872
                          uint16_t* dst_y,
873
                          int dst_stride_y,
874
                          int width,
875
                          int height,
876
0
                          int depth) {
877
0
  int y;
878
0
  int scale = 1 << (16 - depth);
879
0
  void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale,
880
0
                         int width) = MultiplyRow_16_C;
881
0
  if (width <= 0 || height == 0) {
882
0
    return;
883
0
  }
884
  // Negative height means invert the image.
885
0
  if (height < 0) {
886
0
    height = -height;
887
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
888
0
    dst_stride_y = -dst_stride_y;
889
0
  }
890
  // Coalesce rows.
891
0
  if (src_stride_y == width && dst_stride_y == width) {
892
0
    width *= height;
893
0
    height = 1;
894
0
    src_stride_y = dst_stride_y = 0;
895
0
  }
896
897
0
#if defined(HAS_MULTIPLYROW_16_AVX2)
898
0
  if (TestCpuFlag(kCpuHasAVX2)) {
899
0
    MultiplyRow_16 = MultiplyRow_16_Any_AVX2;
900
0
    if (IS_ALIGNED(width, 32)) {
901
0
      MultiplyRow_16 = MultiplyRow_16_AVX2;
902
0
    }
903
0
  }
904
0
#endif
905
#if defined(HAS_MULTIPLYROW_16_NEON)
906
  if (TestCpuFlag(kCpuHasNEON)) {
907
    MultiplyRow_16 = MultiplyRow_16_Any_NEON;
908
    if (IS_ALIGNED(width, 16)) {
909
      MultiplyRow_16 = MultiplyRow_16_NEON;
910
    }
911
  }
912
#endif
913
#if defined(HAS_MULTIPLYROW_16_SME)
914
  if (TestCpuFlag(kCpuHasSME)) {
915
    MultiplyRow_16 = MultiplyRow_16_SME;
916
  }
917
#endif
918
919
0
  for (y = 0; y < height; ++y) {
920
0
    MultiplyRow_16(src_y, dst_y, scale, width);
921
0
    src_y += src_stride_y;
922
0
    dst_y += dst_stride_y;
923
0
  }
924
0
}
925
926
// Convert plane from msb to lsb
927
LIBYUV_API
928
void ConvertToLSBPlane_16(const uint16_t* src_y,
929
                          int src_stride_y,
930
                          uint16_t* dst_y,
931
                          int dst_stride_y,
932
                          int width,
933
                          int height,
934
0
                          int depth) {
935
0
  int y;
936
0
  int scale = 1 << depth;
937
0
  void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
938
0
                    int width) = DivideRow_16_C;
939
0
  if (width <= 0 || height == 0) {
940
0
    return;
941
0
  }
942
  // Negative height means invert the image.
943
0
  if (height < 0) {
944
0
    height = -height;
945
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
946
0
    dst_stride_y = -dst_stride_y;
947
0
  }
948
  // Coalesce rows.
949
0
  if (src_stride_y == width && dst_stride_y == width) {
950
0
    width *= height;
951
0
    height = 1;
952
0
    src_stride_y = dst_stride_y = 0;
953
0
  }
954
955
0
#if defined(HAS_DIVIDEROW_16_AVX2)
956
0
  if (TestCpuFlag(kCpuHasAVX2)) {
957
0
    DivideRow = DivideRow_16_Any_AVX2;
958
0
    if (IS_ALIGNED(width, 32)) {
959
0
      DivideRow = DivideRow_16_AVX2;
960
0
    }
961
0
  }
962
0
#endif
963
#if defined(HAS_DIVIDEROW_16_NEON)
964
  if (TestCpuFlag(kCpuHasNEON)) {
965
    DivideRow = DivideRow_16_Any_NEON;
966
    if (IS_ALIGNED(width, 16)) {
967
      DivideRow = DivideRow_16_NEON;
968
    }
969
  }
970
#endif
971
#if defined(HAS_DIVIDEROW_16_SVE2)
972
  if (TestCpuFlag(kCpuHasSVE2)) {
973
    DivideRow = DivideRow_16_SVE2;
974
  }
975
#endif
976
977
0
  for (y = 0; y < height; ++y) {
978
0
    DivideRow(src_y, dst_y, scale, width);
979
0
    src_y += src_stride_y;
980
0
    dst_y += dst_stride_y;
981
0
  }
982
0
}
983
984
// Swap U and V channels in interleaved UV plane.
985
LIBYUV_API
986
void SwapUVPlane(const uint8_t* src_uv,
987
                 int src_stride_uv,
988
                 uint8_t* dst_vu,
989
                 int dst_stride_vu,
990
                 int width,
991
0
                 int height) {
992
0
  int y;
993
0
  void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
994
0
      SwapUVRow_C;
995
0
  if (width <= 0 || height == 0) {
996
0
    return;
997
0
  }
998
  // Negative height means invert the image.
999
0
  if (height < 0) {
1000
0
    height = -height;
1001
0
    src_uv = src_uv + (height - 1) * src_stride_uv;
1002
0
    src_stride_uv = -src_stride_uv;
1003
0
  }
1004
  // Coalesce rows.
1005
0
  if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
1006
0
    width *= height;
1007
0
    height = 1;
1008
0
    src_stride_uv = dst_stride_vu = 0;
1009
0
  }
1010
1011
0
#if defined(HAS_SWAPUVROW_SSSE3)
1012
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
1013
0
    SwapUVRow = SwapUVRow_Any_SSSE3;
1014
0
    if (IS_ALIGNED(width, 16)) {
1015
0
      SwapUVRow = SwapUVRow_SSSE3;
1016
0
    }
1017
0
  }
1018
0
#endif
1019
0
#if defined(HAS_SWAPUVROW_AVX2)
1020
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1021
0
    SwapUVRow = SwapUVRow_Any_AVX2;
1022
0
    if (IS_ALIGNED(width, 32)) {
1023
0
      SwapUVRow = SwapUVRow_AVX2;
1024
0
    }
1025
0
  }
1026
0
#endif
1027
#if defined(HAS_SWAPUVROW_NEON)
1028
  if (TestCpuFlag(kCpuHasNEON)) {
1029
    SwapUVRow = SwapUVRow_Any_NEON;
1030
    if (IS_ALIGNED(width, 16)) {
1031
      SwapUVRow = SwapUVRow_NEON;
1032
    }
1033
  }
1034
#endif
1035
1036
0
  for (y = 0; y < height; ++y) {
1037
0
    SwapUVRow(src_uv, dst_vu, width);
1038
0
    src_uv += src_stride_uv;
1039
0
    dst_vu += dst_stride_vu;
1040
0
  }
1041
0
}
1042
1043
// Convert NV21 to NV12.
1044
LIBYUV_API
1045
int NV21ToNV12(const uint8_t* src_y,
1046
               int src_stride_y,
1047
               const uint8_t* src_vu,
1048
               int src_stride_vu,
1049
               uint8_t* dst_y,
1050
               int dst_stride_y,
1051
               uint8_t* dst_uv,
1052
               int dst_stride_uv,
1053
               int width,
1054
0
               int height) {
1055
0
  int halfwidth = (width + 1) >> 1;
1056
0
  int halfheight = (height + 1) >> 1;
1057
1058
0
  if (!src_vu || !dst_uv || width <= 0 || height == 0) {
1059
0
    return -1;
1060
0
  }
1061
1062
0
  if (dst_y) {
1063
0
    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
1064
0
  }
1065
1066
  // Negative height means invert the image.
1067
0
  if (height < 0) {
1068
0
    height = -height;
1069
0
    halfheight = (height + 1) >> 1;
1070
0
    src_vu = src_vu + (halfheight - 1) * src_stride_vu;
1071
0
    src_stride_vu = -src_stride_vu;
1072
0
  }
1073
1074
0
  SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
1075
0
              halfheight);
1076
0
  return 0;
1077
0
}
1078
1079
// Test if tile_height is a power of 2 (16 or 32)
1080
0
#define IS_POWEROFTWO(x) (!((x) & ((x)-1)))
1081
1082
// Detile a plane of data
1083
// tile width is 16 and assumed.
1084
// tile_height is 16 or 32 for MM21.
1085
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
1086
// TODO: More detile row functions.
1087
LIBYUV_API
1088
int DetilePlane(const uint8_t* src_y,
1089
                int src_stride_y,
1090
                uint8_t* dst_y,
1091
                int dst_stride_y,
1092
                int width,
1093
                int height,
1094
0
                int tile_height) {
1095
0
  const ptrdiff_t src_tile_stride = 16 * tile_height;
1096
0
  int y;
1097
0
  void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
1098
0
                    int width) = DetileRow_C;
1099
0
  if (!src_y || !dst_y || width <= 0 || height == 0 ||
1100
0
      !IS_POWEROFTWO(tile_height)) {
1101
0
    return -1;
1102
0
  }
1103
1104
  // Negative height means invert the image.
1105
0
  if (height < 0) {
1106
0
    height = -height;
1107
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
1108
0
    dst_stride_y = -dst_stride_y;
1109
0
  }
1110
1111
0
#if defined(HAS_DETILEROW_SSE2)
1112
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1113
0
    DetileRow = DetileRow_Any_SSE2;
1114
0
    if (IS_ALIGNED(width, 16)) {
1115
0
      DetileRow = DetileRow_SSE2;
1116
0
    }
1117
0
  }
1118
0
#endif
1119
#if defined(HAS_DETILEROW_NEON)
1120
  if (TestCpuFlag(kCpuHasNEON)) {
1121
    DetileRow = DetileRow_Any_NEON;
1122
    if (IS_ALIGNED(width, 16)) {
1123
      DetileRow = DetileRow_NEON;
1124
    }
1125
  }
1126
#endif
1127
1128
  // Detile plane
1129
0
  for (y = 0; y < height; ++y) {
1130
0
    DetileRow(src_y, src_tile_stride, dst_y, width);
1131
0
    dst_y += dst_stride_y;
1132
0
    src_y += 16;
1133
    // Advance to next row of tiles.
1134
0
    if ((y & (tile_height - 1)) == (tile_height - 1)) {
1135
0
      src_y = src_y - src_tile_stride + src_stride_y * tile_height;
1136
0
    }
1137
0
  }
1138
0
  return 0;
1139
0
}
1140
1141
// Convert a plane of 16 bit tiles of 16 x H to linear.
1142
// tile width is 16 and assumed.
1143
// tile_height is 16 or 32 for MT2T.
1144
LIBYUV_API
1145
int DetilePlane_16(const uint16_t* src_y,
1146
                   int src_stride_y,
1147
                   uint16_t* dst_y,
1148
                   int dst_stride_y,
1149
                   int width,
1150
                   int height,
1151
0
                   int tile_height) {
1152
0
  const ptrdiff_t src_tile_stride = 16 * tile_height;
1153
0
  int y;
1154
0
  void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
1155
0
                       uint16_t* dst, int width) = DetileRow_16_C;
1156
0
  if (!src_y || !dst_y || width <= 0 || height == 0 ||
1157
0
      !IS_POWEROFTWO(tile_height)) {
1158
0
    return -1;
1159
0
  }
1160
1161
  // Negative height means invert the image.
1162
0
  if (height < 0) {
1163
0
    height = -height;
1164
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
1165
0
    dst_stride_y = -dst_stride_y;
1166
0
  }
1167
1168
0
#if defined(HAS_DETILEROW_16_SSE2)
1169
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1170
0
    DetileRow_16 = DetileRow_16_Any_SSE2;
1171
0
    if (IS_ALIGNED(width, 16)) {
1172
0
      DetileRow_16 = DetileRow_16_SSE2;
1173
0
    }
1174
0
  }
1175
0
#endif
1176
0
#if defined(HAS_DETILEROW_16_AVX)
1177
0
  if (TestCpuFlag(kCpuHasAVX)) {
1178
0
    DetileRow_16 = DetileRow_16_Any_AVX;
1179
0
    if (IS_ALIGNED(width, 16)) {
1180
0
      DetileRow_16 = DetileRow_16_AVX;
1181
0
    }
1182
0
  }
1183
0
#endif
1184
#if defined(HAS_DETILEROW_16_NEON)
1185
  if (TestCpuFlag(kCpuHasNEON)) {
1186
    DetileRow_16 = DetileRow_16_Any_NEON;
1187
    if (IS_ALIGNED(width, 16)) {
1188
      DetileRow_16 = DetileRow_16_NEON;
1189
    }
1190
  }
1191
#endif
1192
1193
  // Detile plane
1194
0
  for (y = 0; y < height; ++y) {
1195
0
    DetileRow_16(src_y, src_tile_stride, dst_y, width);
1196
0
    dst_y += dst_stride_y;
1197
0
    src_y += 16;
1198
    // Advance to next row of tiles.
1199
0
    if ((y & (tile_height - 1)) == (tile_height - 1)) {
1200
0
      src_y = src_y - src_tile_stride + src_stride_y * tile_height;
1201
0
    }
1202
0
  }
1203
0
  return 0;
1204
0
}
1205
1206
LIBYUV_API
1207
void DetileSplitUVPlane(const uint8_t* src_uv,
1208
                        int src_stride_uv,
1209
                        uint8_t* dst_u,
1210
                        int dst_stride_u,
1211
                        uint8_t* dst_v,
1212
                        int dst_stride_v,
1213
                        int width,
1214
                        int height,
1215
0
                        int tile_height) {
1216
0
  const ptrdiff_t src_tile_stride = 16 * tile_height;
1217
0
  int y;
1218
0
  void (*DetileSplitUVRow)(const uint8_t* src, ptrdiff_t src_tile_stride,
1219
0
                           uint8_t* dst_u, uint8_t* dst_v, int width) =
1220
0
      DetileSplitUVRow_C;
1221
0
  assert(src_stride_uv >= 0);
1222
0
  assert(tile_height > 0);
1223
0
  assert(src_stride_uv > 0);
1224
1225
0
  if (width <= 0 || height == 0) {
1226
0
    return;
1227
0
  }
1228
  // Negative height means invert the image.
1229
0
  if (height < 0) {
1230
0
    height = -height;
1231
0
    dst_u = dst_u + (height - 1) * dst_stride_u;
1232
0
    dst_stride_u = -dst_stride_u;
1233
0
    dst_v = dst_v + (height - 1) * dst_stride_v;
1234
0
    dst_stride_v = -dst_stride_v;
1235
0
  }
1236
1237
0
#if defined(HAS_DETILESPLITUVROW_SSSE3)
1238
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
1239
0
    DetileSplitUVRow = DetileSplitUVRow_Any_SSSE3;
1240
0
    if (IS_ALIGNED(width, 16)) {
1241
0
      DetileSplitUVRow = DetileSplitUVRow_SSSE3;
1242
0
    }
1243
0
  }
1244
0
#endif
1245
#if defined(HAS_DETILESPLITUVROW_NEON)
1246
  if (TestCpuFlag(kCpuHasNEON)) {
1247
    DetileSplitUVRow = DetileSplitUVRow_Any_NEON;
1248
    if (IS_ALIGNED(width, 16)) {
1249
      DetileSplitUVRow = DetileSplitUVRow_NEON;
1250
    }
1251
  }
1252
#endif
1253
1254
  // Detile plane
1255
0
  for (y = 0; y < height; ++y) {
1256
0
    DetileSplitUVRow(src_uv, src_tile_stride, dst_u, dst_v, width);
1257
0
    dst_u += dst_stride_u;
1258
0
    dst_v += dst_stride_v;
1259
0
    src_uv += 16;
1260
    // Advance to next row of tiles.
1261
0
    if ((y & (tile_height - 1)) == (tile_height - 1)) {
1262
0
      src_uv = src_uv - src_tile_stride + src_stride_uv * tile_height;
1263
0
    }
1264
0
  }
1265
0
}
1266
1267
LIBYUV_API
1268
void DetileToYUY2(const uint8_t* src_y,
1269
                  int src_stride_y,
1270
                  const uint8_t* src_uv,
1271
                  int src_stride_uv,
1272
                  uint8_t* dst_yuy2,
1273
                  int dst_stride_yuy2,
1274
                  int width,
1275
                  int height,
1276
0
                  int tile_height) {
1277
0
  const ptrdiff_t src_y_tile_stride = 16 * tile_height;
1278
0
  const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
1279
0
  int y;
1280
0
  void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
1281
0
                       const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
1282
0
                       uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
1283
0
  assert(src_stride_y >= 0);
1284
0
  assert(src_stride_y > 0);
1285
0
  assert(src_stride_uv >= 0);
1286
0
  assert(src_stride_uv > 0);
1287
0
  assert(tile_height > 0);
1288
1289
0
  if (width <= 0 || height == 0 || tile_height <= 0) {
1290
0
    return;
1291
0
  }
1292
  // Negative height means invert the image.
1293
0
  if (height < 0) {
1294
0
    height = -height;
1295
0
    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
1296
0
    dst_stride_yuy2 = -dst_stride_yuy2;
1297
0
  }
1298
1299
#if defined(HAS_DETILETOYUY2_NEON)
1300
  if (TestCpuFlag(kCpuHasNEON)) {
1301
    DetileToYUY2 = DetileToYUY2_Any_NEON;
1302
    if (IS_ALIGNED(width, 16)) {
1303
      DetileToYUY2 = DetileToYUY2_NEON;
1304
    }
1305
  }
1306
#endif
1307
1308
0
#if defined(HAS_DETILETOYUY2_SSE2)
1309
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1310
0
    DetileToYUY2 = DetileToYUY2_Any_SSE2;
1311
0
    if (IS_ALIGNED(width, 16)) {
1312
0
      DetileToYUY2 = DetileToYUY2_SSE2;
1313
0
    }
1314
0
  }
1315
0
#endif
1316
1317
  // Detile plane
1318
0
  for (y = 0; y < height; ++y) {
1319
0
    DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
1320
0
                 width);
1321
0
    dst_yuy2 += dst_stride_yuy2;
1322
0
    src_y += 16;
1323
1324
0
    if (y & 0x1)
1325
0
      src_uv += 16;
1326
1327
    // Advance to next row of tiles.
1328
0
    if ((y & (tile_height - 1)) == (tile_height - 1)) {
1329
0
      src_y = src_y - src_y_tile_stride + src_stride_y * tile_height;
1330
0
      src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2);
1331
0
    }
1332
0
  }
1333
0
}
1334
1335
// Support function for NV12 etc RGB channels.
1336
// Width and height are plane sizes (typically half pixel width).
1337
LIBYUV_API
1338
void SplitRGBPlane(const uint8_t* src_rgb,
1339
                   int src_stride_rgb,
1340
                   uint8_t* dst_r,
1341
                   int dst_stride_r,
1342
                   uint8_t* dst_g,
1343
                   int dst_stride_g,
1344
                   uint8_t* dst_b,
1345
                   int dst_stride_b,
1346
                   int width,
1347
0
                   int height) {
1348
0
  int y;
1349
0
  void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1350
0
                      uint8_t* dst_b, int width) = SplitRGBRow_C;
1351
0
  if (width <= 0 || height == 0) {
1352
0
    return;
1353
0
  }
1354
  // Negative height means invert the image.
1355
0
  if (height < 0) {
1356
0
    height = -height;
1357
0
    dst_r = dst_r + (height - 1) * dst_stride_r;
1358
0
    dst_g = dst_g + (height - 1) * dst_stride_g;
1359
0
    dst_b = dst_b + (height - 1) * dst_stride_b;
1360
0
    dst_stride_r = -dst_stride_r;
1361
0
    dst_stride_g = -dst_stride_g;
1362
0
    dst_stride_b = -dst_stride_b;
1363
0
  }
1364
  // Coalesce rows.
1365
0
  if (src_stride_rgb == width * 3 && dst_stride_r == width &&
1366
0
      dst_stride_g == width && dst_stride_b == width) {
1367
0
    width *= height;
1368
0
    height = 1;
1369
0
    src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1370
0
  }
1371
0
#if defined(HAS_SPLITRGBROW_SSSE3)
1372
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
1373
0
    SplitRGBRow = SplitRGBRow_Any_SSSE3;
1374
0
    if (IS_ALIGNED(width, 16)) {
1375
0
      SplitRGBRow = SplitRGBRow_SSSE3;
1376
0
    }
1377
0
  }
1378
0
#endif
1379
0
#if defined(HAS_SPLITRGBROW_SSE41)
1380
0
  if (TestCpuFlag(kCpuHasSSE41)) {
1381
0
    SplitRGBRow = SplitRGBRow_Any_SSE41;
1382
0
    if (IS_ALIGNED(width, 16)) {
1383
0
      SplitRGBRow = SplitRGBRow_SSE41;
1384
0
    }
1385
0
  }
1386
0
#endif
1387
0
#if defined(HAS_SPLITRGBROW_AVX2)
1388
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1389
0
    SplitRGBRow = SplitRGBRow_Any_AVX2;
1390
0
    if (IS_ALIGNED(width, 32)) {
1391
0
      SplitRGBRow = SplitRGBRow_AVX2;
1392
0
    }
1393
0
  }
1394
0
#endif
1395
#if defined(HAS_SPLITRGBROW_NEON)
1396
  if (TestCpuFlag(kCpuHasNEON)) {
1397
    SplitRGBRow = SplitRGBRow_Any_NEON;
1398
    if (IS_ALIGNED(width, 16)) {
1399
      SplitRGBRow = SplitRGBRow_NEON;
1400
    }
1401
  }
1402
#endif
1403
#if defined(HAS_SPLITRGBROW_RVV)
1404
  if (TestCpuFlag(kCpuHasRVV)) {
1405
    SplitRGBRow = SplitRGBRow_RVV;
1406
  }
1407
#endif
1408
1409
0
  for (y = 0; y < height; ++y) {
1410
    // Copy a row of RGB.
1411
0
    SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
1412
0
    dst_r += dst_stride_r;
1413
0
    dst_g += dst_stride_g;
1414
0
    dst_b += dst_stride_b;
1415
0
    src_rgb += src_stride_rgb;
1416
0
  }
1417
0
}
1418
1419
LIBYUV_API
1420
void MergeRGBPlane(const uint8_t* src_r,
1421
                   int src_stride_r,
1422
                   const uint8_t* src_g,
1423
                   int src_stride_g,
1424
                   const uint8_t* src_b,
1425
                   int src_stride_b,
1426
                   uint8_t* dst_rgb,
1427
                   int dst_stride_rgb,
1428
                   int width,
1429
0
                   int height) {
1430
0
  int y;
1431
0
  void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1432
0
                      const uint8_t* src_b, uint8_t* dst_rgb, int width) =
1433
0
      MergeRGBRow_C;
1434
0
  if (width <= 0 || height == 0) {
1435
0
    return;
1436
0
  }
1437
  // Coalesce rows.
1438
  // Negative height means invert the image.
1439
0
  if (height < 0) {
1440
0
    height = -height;
1441
0
    dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
1442
0
    dst_stride_rgb = -dst_stride_rgb;
1443
0
  }
1444
  // Coalesce rows.
1445
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1446
0
      dst_stride_rgb == width * 3) {
1447
0
    width *= height;
1448
0
    height = 1;
1449
0
    src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
1450
0
  }
1451
0
#if defined(HAS_MERGERGBROW_SSSE3)
1452
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
1453
0
    MergeRGBRow = MergeRGBRow_Any_SSSE3;
1454
0
    if (IS_ALIGNED(width, 16)) {
1455
0
      MergeRGBRow = MergeRGBRow_SSSE3;
1456
0
    }
1457
0
  }
1458
0
#endif
1459
#if defined(HAS_MERGERGBROW_NEON)
1460
  if (TestCpuFlag(kCpuHasNEON)) {
1461
    MergeRGBRow = MergeRGBRow_Any_NEON;
1462
    if (IS_ALIGNED(width, 16)) {
1463
      MergeRGBRow = MergeRGBRow_NEON;
1464
    }
1465
  }
1466
#endif
1467
#if defined(HAS_MERGERGBROW_RVV)
1468
  if (TestCpuFlag(kCpuHasRVV)) {
1469
    MergeRGBRow = MergeRGBRow_RVV;
1470
  }
1471
#endif
1472
1473
0
  for (y = 0; y < height; ++y) {
1474
    // Merge a row of U and V into a row of RGB.
1475
0
    MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
1476
0
    src_r += src_stride_r;
1477
0
    src_g += src_stride_g;
1478
0
    src_b += src_stride_b;
1479
0
    dst_rgb += dst_stride_rgb;
1480
0
  }
1481
0
}
1482
1483
LIBYUV_NOINLINE
1484
static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
1485
                                int src_stride_argb,
1486
                                uint8_t* dst_r,
1487
                                int dst_stride_r,
1488
                                uint8_t* dst_g,
1489
                                int dst_stride_g,
1490
                                uint8_t* dst_b,
1491
                                int dst_stride_b,
1492
                                uint8_t* dst_a,
1493
                                int dst_stride_a,
1494
                                int width,
1495
0
                                int height) {
1496
0
  int y;
1497
0
  void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1498
0
                       uint8_t* dst_b, uint8_t* dst_a, int width) =
1499
0
      SplitARGBRow_C;
1500
1501
0
  assert(height > 0);
1502
1503
0
  if (width <= 0 || height == 0) {
1504
0
    return;
1505
0
  }
1506
0
  if (src_stride_argb == width * 4 && dst_stride_r == width &&
1507
0
      dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
1508
0
    width *= height;
1509
0
    height = 1;
1510
0
    src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
1511
0
        dst_stride_a = 0;
1512
0
  }
1513
1514
0
#if defined(HAS_SPLITARGBROW_SSE2)
1515
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1516
0
    SplitARGBRow = SplitARGBRow_Any_SSE2;
1517
0
    if (IS_ALIGNED(width, 8)) {
1518
0
      SplitARGBRow = SplitARGBRow_SSE2;
1519
0
    }
1520
0
  }
1521
0
#endif
1522
0
#if defined(HAS_SPLITARGBROW_SSSE3)
1523
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
1524
0
    SplitARGBRow = SplitARGBRow_Any_SSSE3;
1525
0
    if (IS_ALIGNED(width, 8)) {
1526
0
      SplitARGBRow = SplitARGBRow_SSSE3;
1527
0
    }
1528
0
  }
1529
0
#endif
1530
0
#if defined(HAS_SPLITARGBROW_AVX2)
1531
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1532
0
    SplitARGBRow = SplitARGBRow_Any_AVX2;
1533
0
    if (IS_ALIGNED(width, 16)) {
1534
0
      SplitARGBRow = SplitARGBRow_AVX2;
1535
0
    }
1536
0
  }
1537
0
#endif
1538
#if defined(HAS_SPLITARGBROW_NEON)
1539
  if (TestCpuFlag(kCpuHasNEON)) {
1540
    SplitARGBRow = SplitARGBRow_Any_NEON;
1541
    if (IS_ALIGNED(width, 16)) {
1542
      SplitARGBRow = SplitARGBRow_NEON;
1543
    }
1544
  }
1545
#endif
1546
#if defined(HAS_SPLITARGBROW_RVV)
1547
  if (TestCpuFlag(kCpuHasRVV)) {
1548
    SplitARGBRow = SplitARGBRow_RVV;
1549
  }
1550
#endif
1551
1552
0
  for (y = 0; y < height; ++y) {
1553
0
    SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
1554
0
    dst_r += dst_stride_r;
1555
0
    dst_g += dst_stride_g;
1556
0
    dst_b += dst_stride_b;
1557
0
    dst_a += dst_stride_a;
1558
0
    src_argb += src_stride_argb;
1559
0
  }
1560
0
}
1561
1562
LIBYUV_NOINLINE
1563
static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
1564
                                 int src_stride_argb,
1565
                                 uint8_t* dst_r,
1566
                                 int dst_stride_r,
1567
                                 uint8_t* dst_g,
1568
                                 int dst_stride_g,
1569
                                 uint8_t* dst_b,
1570
                                 int dst_stride_b,
1571
                                 int width,
1572
0
                                 int height) {
1573
0
  int y;
1574
0
  void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1575
0
                       uint8_t* dst_b, int width) = SplitXRGBRow_C;
1576
0
  assert(height > 0);
1577
1578
0
  if (width <= 0 || height == 0) {
1579
0
    return;
1580
0
  }
1581
0
  if (src_stride_argb == width * 4 && dst_stride_r == width &&
1582
0
      dst_stride_g == width && dst_stride_b == width) {
1583
0
    width *= height;
1584
0
    height = 1;
1585
0
    src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1586
0
  }
1587
1588
0
#if defined(HAS_SPLITXRGBROW_SSE2)
1589
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1590
0
    SplitXRGBRow = SplitXRGBRow_Any_SSE2;
1591
0
    if (IS_ALIGNED(width, 8)) {
1592
0
      SplitXRGBRow = SplitXRGBRow_SSE2;
1593
0
    }
1594
0
  }
1595
0
#endif
1596
0
#if defined(HAS_SPLITXRGBROW_SSSE3)
1597
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
1598
0
    SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
1599
0
    if (IS_ALIGNED(width, 8)) {
1600
0
      SplitXRGBRow = SplitXRGBRow_SSSE3;
1601
0
    }
1602
0
  }
1603
0
#endif
1604
0
#if defined(HAS_SPLITXRGBROW_AVX2)
1605
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1606
0
    SplitXRGBRow = SplitXRGBRow_Any_AVX2;
1607
0
    if (IS_ALIGNED(width, 16)) {
1608
0
      SplitXRGBRow = SplitXRGBRow_AVX2;
1609
0
    }
1610
0
  }
1611
0
#endif
1612
#if defined(HAS_SPLITXRGBROW_NEON)
1613
  if (TestCpuFlag(kCpuHasNEON)) {
1614
    SplitXRGBRow = SplitXRGBRow_Any_NEON;
1615
    if (IS_ALIGNED(width, 16)) {
1616
      SplitXRGBRow = SplitXRGBRow_NEON;
1617
    }
1618
  }
1619
#endif
1620
#if defined(HAS_SPLITXRGBROW_RVV)
1621
  if (TestCpuFlag(kCpuHasRVV)) {
1622
    SplitXRGBRow = SplitXRGBRow_RVV;
1623
  }
1624
#endif
1625
1626
0
  for (y = 0; y < height; ++y) {
1627
0
    SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
1628
0
    dst_r += dst_stride_r;
1629
0
    dst_g += dst_stride_g;
1630
0
    dst_b += dst_stride_b;
1631
0
    src_argb += src_stride_argb;
1632
0
  }
1633
0
}
1634
1635
LIBYUV_API
1636
void SplitARGBPlane(const uint8_t* src_argb,
1637
                    int src_stride_argb,
1638
                    uint8_t* dst_r,
1639
                    int dst_stride_r,
1640
                    uint8_t* dst_g,
1641
                    int dst_stride_g,
1642
                    uint8_t* dst_b,
1643
                    int dst_stride_b,
1644
                    uint8_t* dst_a,
1645
                    int dst_stride_a,
1646
                    int width,
1647
0
                    int height) {
1648
  // Negative height means invert the image.
1649
0
  if (height < 0) {
1650
0
    height = -height;
1651
0
    dst_r = dst_r + (height - 1) * dst_stride_r;
1652
0
    dst_g = dst_g + (height - 1) * dst_stride_g;
1653
0
    dst_b = dst_b + (height - 1) * dst_stride_b;
1654
0
    dst_a = dst_a + (height - 1) * dst_stride_a;
1655
0
    dst_stride_r = -dst_stride_r;
1656
0
    dst_stride_g = -dst_stride_g;
1657
0
    dst_stride_b = -dst_stride_b;
1658
0
    dst_stride_a = -dst_stride_a;
1659
0
  }
1660
1661
0
  if (dst_a == NULL) {
1662
0
    SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1663
0
                         dst_stride_g, dst_b, dst_stride_b, width, height);
1664
0
  } else {
1665
0
    SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1666
0
                        dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a,
1667
0
                        width, height);
1668
0
  }
1669
0
}
1670
1671
LIBYUV_NOINLINE
1672
static void MergeARGBPlaneAlpha(const uint8_t* src_r,
1673
                                int src_stride_r,
1674
                                const uint8_t* src_g,
1675
                                int src_stride_g,
1676
                                const uint8_t* src_b,
1677
                                int src_stride_b,
1678
                                const uint8_t* src_a,
1679
                                int src_stride_a,
1680
                                uint8_t* dst_argb,
1681
                                int dst_stride_argb,
1682
                                int width,
1683
0
                                int height) {
1684
0
  int y;
1685
0
  void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1686
0
                       const uint8_t* src_b, const uint8_t* src_a,
1687
0
                       uint8_t* dst_argb, int width) = MergeARGBRow_C;
1688
1689
0
  assert(height > 0);
1690
1691
0
  if (width <= 0 || height == 0) {
1692
0
    return;
1693
0
  }
1694
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1695
0
      src_stride_a == width && dst_stride_argb == width * 4) {
1696
0
    width *= height;
1697
0
    height = 1;
1698
0
    src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1699
0
        dst_stride_argb = 0;
1700
0
  }
1701
0
#if defined(HAS_MERGEARGBROW_SSE2)
1702
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1703
0
    MergeARGBRow = MergeARGBRow_Any_SSE2;
1704
0
    if (IS_ALIGNED(width, 8)) {
1705
0
      MergeARGBRow = MergeARGBRow_SSE2;
1706
0
    }
1707
0
  }
1708
0
#endif
1709
0
#if defined(HAS_MERGEARGBROW_AVX2)
1710
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1711
0
    MergeARGBRow = MergeARGBRow_Any_AVX2;
1712
0
    if (IS_ALIGNED(width, 16)) {
1713
0
      MergeARGBRow = MergeARGBRow_AVX2;
1714
0
    }
1715
0
  }
1716
0
#endif
1717
#if defined(HAS_MERGEARGBROW_NEON)
1718
  if (TestCpuFlag(kCpuHasNEON)) {
1719
    MergeARGBRow = MergeARGBRow_Any_NEON;
1720
    if (IS_ALIGNED(width, 16)) {
1721
      MergeARGBRow = MergeARGBRow_NEON;
1722
    }
1723
  }
1724
#endif
1725
#if defined(HAS_MERGEARGBROW_RVV)
1726
  if (TestCpuFlag(kCpuHasRVV)) {
1727
    MergeARGBRow = MergeARGBRow_RVV;
1728
  }
1729
#endif
1730
1731
0
  for (y = 0; y < height; ++y) {
1732
0
    MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
1733
0
    src_r += src_stride_r;
1734
0
    src_g += src_stride_g;
1735
0
    src_b += src_stride_b;
1736
0
    src_a += src_stride_a;
1737
0
    dst_argb += dst_stride_argb;
1738
0
  }
1739
0
}
1740
1741
LIBYUV_NOINLINE
1742
static void MergeARGBPlaneOpaque(const uint8_t* src_r,
1743
                                 int src_stride_r,
1744
                                 const uint8_t* src_g,
1745
                                 int src_stride_g,
1746
                                 const uint8_t* src_b,
1747
                                 int src_stride_b,
1748
                                 uint8_t* dst_argb,
1749
                                 int dst_stride_argb,
1750
                                 int width,
1751
0
                                 int height) {
1752
0
  int y;
1753
0
  void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1754
0
                       const uint8_t* src_b, uint8_t* dst_argb, int width) =
1755
0
      MergeXRGBRow_C;
1756
1757
0
  assert(height > 0);
1758
1759
0
  if (width <= 0 || height == 0) {
1760
0
    return;
1761
0
  }
1762
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1763
0
      dst_stride_argb == width * 4) {
1764
0
    width *= height;
1765
0
    height = 1;
1766
0
    src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1767
0
  }
1768
0
#if defined(HAS_MERGEXRGBROW_SSE2)
1769
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1770
0
    MergeXRGBRow = MergeXRGBRow_Any_SSE2;
1771
0
    if (IS_ALIGNED(width, 8)) {
1772
0
      MergeXRGBRow = MergeXRGBRow_SSE2;
1773
0
    }
1774
0
  }
1775
0
#endif
1776
0
#if defined(HAS_MERGEXRGBROW_AVX2)
1777
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1778
0
    MergeXRGBRow = MergeXRGBRow_Any_AVX2;
1779
0
    if (IS_ALIGNED(width, 16)) {
1780
0
      MergeXRGBRow = MergeXRGBRow_AVX2;
1781
0
    }
1782
0
  }
1783
0
#endif
1784
#if defined(HAS_MERGEXRGBROW_NEON)
1785
  if (TestCpuFlag(kCpuHasNEON)) {
1786
    MergeXRGBRow = MergeXRGBRow_Any_NEON;
1787
    if (IS_ALIGNED(width, 16)) {
1788
      MergeXRGBRow = MergeXRGBRow_NEON;
1789
    }
1790
  }
1791
#endif
1792
#if defined(HAS_MERGEXRGBROW_RVV)
1793
  if (TestCpuFlag(kCpuHasRVV)) {
1794
    MergeXRGBRow = MergeXRGBRow_RVV;
1795
  }
1796
#endif
1797
1798
0
  for (y = 0; y < height; ++y) {
1799
0
    MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
1800
0
    src_r += src_stride_r;
1801
0
    src_g += src_stride_g;
1802
0
    src_b += src_stride_b;
1803
0
    dst_argb += dst_stride_argb;
1804
0
  }
1805
0
}
1806
1807
LIBYUV_API
1808
void MergeARGBPlane(const uint8_t* src_r,
1809
                    int src_stride_r,
1810
                    const uint8_t* src_g,
1811
                    int src_stride_g,
1812
                    const uint8_t* src_b,
1813
                    int src_stride_b,
1814
                    const uint8_t* src_a,
1815
                    int src_stride_a,
1816
                    uint8_t* dst_argb,
1817
                    int dst_stride_argb,
1818
                    int width,
1819
0
                    int height) {
1820
  // Negative height means invert the image.
1821
0
  if (height < 0) {
1822
0
    height = -height;
1823
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1824
0
    dst_stride_argb = -dst_stride_argb;
1825
0
  }
1826
1827
0
  if (src_a == NULL) {
1828
0
    MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1829
0
                         src_stride_b, dst_argb, dst_stride_argb, width,
1830
0
                         height);
1831
0
  } else {
1832
0
    MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1833
0
                        src_stride_b, src_a, src_stride_a, dst_argb,
1834
0
                        dst_stride_argb, width, height);
1835
0
  }
1836
0
}
1837
1838
// TODO(yuan): Support 2 bit alpha channel.
1839
LIBYUV_API
1840
void MergeXR30Plane(const uint16_t* src_r,
1841
                    int src_stride_r,
1842
                    const uint16_t* src_g,
1843
                    int src_stride_g,
1844
                    const uint16_t* src_b,
1845
                    int src_stride_b,
1846
                    uint8_t* dst_ar30,
1847
                    int dst_stride_ar30,
1848
                    int width,
1849
                    int height,
1850
0
                    int depth) {
1851
0
  int y;
1852
0
  void (*MergeXR30Row)(const uint16_t* src_r, const uint16_t* src_g,
1853
0
                       const uint16_t* src_b, uint8_t* dst_ar30, int depth,
1854
0
                       int width) = MergeXR30Row_C;
1855
1856
  // Negative height means invert the image.
1857
0
  if (height < 0) {
1858
0
    height = -height;
1859
0
    dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
1860
0
    dst_stride_ar30 = -dst_stride_ar30;
1861
0
  }
1862
  // Coalesce rows.
1863
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1864
0
      dst_stride_ar30 == width * 4) {
1865
0
    width *= height;
1866
0
    height = 1;
1867
0
    src_stride_r = src_stride_g = src_stride_b = dst_stride_ar30 = 0;
1868
0
  }
1869
0
#if defined(HAS_MERGEXR30ROW_AVX2)
1870
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1871
0
    MergeXR30Row = MergeXR30Row_Any_AVX2;
1872
0
    if (IS_ALIGNED(width, 16)) {
1873
0
      MergeXR30Row = MergeXR30Row_AVX2;
1874
0
    }
1875
0
  }
1876
0
#endif
1877
#if defined(HAS_MERGEXR30ROW_NEON)
1878
  if (TestCpuFlag(kCpuHasNEON)) {
1879
    if (depth == 10) {
1880
      MergeXR30Row = MergeXR30Row_10_Any_NEON;
1881
      if (IS_ALIGNED(width, 8)) {
1882
        MergeXR30Row = MergeXR30Row_10_NEON;
1883
      }
1884
    } else {
1885
      MergeXR30Row = MergeXR30Row_Any_NEON;
1886
      if (IS_ALIGNED(width, 8)) {
1887
        MergeXR30Row = MergeXR30Row_NEON;
1888
      }
1889
    }
1890
  }
1891
#endif
1892
1893
0
  for (y = 0; y < height; ++y) {
1894
0
    MergeXR30Row(src_r, src_g, src_b, dst_ar30, depth, width);
1895
0
    src_r += src_stride_r;
1896
0
    src_g += src_stride_g;
1897
0
    src_b += src_stride_b;
1898
0
    dst_ar30 += dst_stride_ar30;
1899
0
  }
1900
0
}
1901
1902
LIBYUV_NOINLINE
1903
static void MergeAR64PlaneAlpha(const uint16_t* src_r,
1904
                                int src_stride_r,
1905
                                const uint16_t* src_g,
1906
                                int src_stride_g,
1907
                                const uint16_t* src_b,
1908
                                int src_stride_b,
1909
                                const uint16_t* src_a,
1910
                                int src_stride_a,
1911
                                uint16_t* dst_ar64,
1912
                                int dst_stride_ar64,
1913
                                int width,
1914
                                int height,
1915
0
                                int depth) {
1916
0
  int y;
1917
0
  void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1918
0
                       const uint16_t* src_b, const uint16_t* src_a,
1919
0
                       uint16_t* dst_argb, int depth, int width) =
1920
0
      MergeAR64Row_C;
1921
1922
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1923
0
      src_stride_a == width && dst_stride_ar64 == width * 4) {
1924
0
    width *= height;
1925
0
    height = 1;
1926
0
    src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1927
0
        dst_stride_ar64 = 0;
1928
0
  }
1929
0
#if defined(HAS_MERGEAR64ROW_AVX2)
1930
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1931
0
    MergeAR64Row = MergeAR64Row_Any_AVX2;
1932
0
    if (IS_ALIGNED(width, 16)) {
1933
0
      MergeAR64Row = MergeAR64Row_AVX2;
1934
0
    }
1935
0
  }
1936
0
#endif
1937
#if defined(HAS_MERGEAR64ROW_NEON)
1938
  if (TestCpuFlag(kCpuHasNEON)) {
1939
    MergeAR64Row = MergeAR64Row_Any_NEON;
1940
    if (IS_ALIGNED(width, 8)) {
1941
      MergeAR64Row = MergeAR64Row_NEON;
1942
    }
1943
  }
1944
#endif
1945
1946
0
  for (y = 0; y < height; ++y) {
1947
0
    MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
1948
0
    src_r += src_stride_r;
1949
0
    src_g += src_stride_g;
1950
0
    src_b += src_stride_b;
1951
0
    src_a += src_stride_a;
1952
0
    dst_ar64 += dst_stride_ar64;
1953
0
  }
1954
0
}
1955
1956
LIBYUV_NOINLINE
1957
static void MergeAR64PlaneOpaque(const uint16_t* src_r,
1958
                                 int src_stride_r,
1959
                                 const uint16_t* src_g,
1960
                                 int src_stride_g,
1961
                                 const uint16_t* src_b,
1962
                                 int src_stride_b,
1963
                                 uint16_t* dst_ar64,
1964
                                 int dst_stride_ar64,
1965
                                 int width,
1966
                                 int height,
1967
0
                                 int depth) {
1968
0
  int y;
1969
0
  void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1970
0
                       const uint16_t* src_b, uint16_t* dst_argb, int depth,
1971
0
                       int width) = MergeXR64Row_C;
1972
1973
  // Coalesce rows.
1974
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1975
0
      dst_stride_ar64 == width * 4) {
1976
0
    width *= height;
1977
0
    height = 1;
1978
0
    src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
1979
0
  }
1980
0
#if defined(HAS_MERGEXR64ROW_AVX2)
1981
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1982
0
    MergeXR64Row = MergeXR64Row_Any_AVX2;
1983
0
    if (IS_ALIGNED(width, 16)) {
1984
0
      MergeXR64Row = MergeXR64Row_AVX2;
1985
0
    }
1986
0
  }
1987
0
#endif
1988
#if defined(HAS_MERGEXR64ROW_NEON)
1989
  if (TestCpuFlag(kCpuHasNEON)) {
1990
    MergeXR64Row = MergeXR64Row_Any_NEON;
1991
    if (IS_ALIGNED(width, 8)) {
1992
      MergeXR64Row = MergeXR64Row_NEON;
1993
    }
1994
  }
1995
#endif
1996
1997
0
  for (y = 0; y < height; ++y) {
1998
0
    MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
1999
0
    src_r += src_stride_r;
2000
0
    src_g += src_stride_g;
2001
0
    src_b += src_stride_b;
2002
0
    dst_ar64 += dst_stride_ar64;
2003
0
  }
2004
0
}
2005
2006
LIBYUV_API
2007
void MergeAR64Plane(const uint16_t* src_r,
2008
                    int src_stride_r,
2009
                    const uint16_t* src_g,
2010
                    int src_stride_g,
2011
                    const uint16_t* src_b,
2012
                    int src_stride_b,
2013
                    const uint16_t* src_a,
2014
                    int src_stride_a,
2015
                    uint16_t* dst_ar64,
2016
                    int dst_stride_ar64,
2017
                    int width,
2018
                    int height,
2019
0
                    int depth) {
2020
  // Negative height means invert the image.
2021
0
  if (height < 0) {
2022
0
    height = -height;
2023
0
    dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64;
2024
0
    dst_stride_ar64 = -dst_stride_ar64;
2025
0
  }
2026
2027
0
  if (src_a == NULL) {
2028
0
    MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
2029
0
                         src_stride_b, dst_ar64, dst_stride_ar64, width, height,
2030
0
                         depth);
2031
0
  } else {
2032
0
    MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
2033
0
                        src_stride_b, src_a, src_stride_a, dst_ar64,
2034
0
                        dst_stride_ar64, width, height, depth);
2035
0
  }
2036
0
}
2037
2038
LIBYUV_NOINLINE
2039
static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r,
2040
                                     int src_stride_r,
2041
                                     const uint16_t* src_g,
2042
                                     int src_stride_g,
2043
                                     const uint16_t* src_b,
2044
                                     int src_stride_b,
2045
                                     const uint16_t* src_a,
2046
                                     int src_stride_a,
2047
                                     uint8_t* dst_argb,
2048
                                     int dst_stride_argb,
2049
                                     int width,
2050
                                     int height,
2051
0
                                     int depth) {
2052
0
  int y;
2053
0
  void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
2054
0
                            const uint16_t* src_b, const uint16_t* src_a,
2055
0
                            uint8_t* dst_argb, int depth, int width) =
2056
0
      MergeARGB16To8Row_C;
2057
2058
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
2059
0
      src_stride_a == width && dst_stride_argb == width * 4) {
2060
0
    width *= height;
2061
0
    height = 1;
2062
0
    src_stride_r = src_stride_g = src_stride_b = src_stride_a =
2063
0
        dst_stride_argb = 0;
2064
0
  }
2065
0
#if defined(HAS_MERGEARGB16TO8ROW_AVX2)
2066
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2067
0
    MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
2068
0
    if (IS_ALIGNED(width, 16)) {
2069
0
      MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
2070
0
    }
2071
0
  }
2072
0
#endif
2073
#if defined(HAS_MERGEARGB16TO8ROW_NEON)
2074
  if (TestCpuFlag(kCpuHasNEON)) {
2075
    MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
2076
    if (IS_ALIGNED(width, 8)) {
2077
      MergeARGB16To8Row = MergeARGB16To8Row_NEON;
2078
    }
2079
  }
2080
#endif
2081
2082
0
  for (y = 0; y < height; ++y) {
2083
0
    MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
2084
0
    src_r += src_stride_r;
2085
0
    src_g += src_stride_g;
2086
0
    src_b += src_stride_b;
2087
0
    src_a += src_stride_a;
2088
0
    dst_argb += dst_stride_argb;
2089
0
  }
2090
0
}
2091
2092
LIBYUV_NOINLINE
2093
static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r,
2094
                                      int src_stride_r,
2095
                                      const uint16_t* src_g,
2096
                                      int src_stride_g,
2097
                                      const uint16_t* src_b,
2098
                                      int src_stride_b,
2099
                                      uint8_t* dst_argb,
2100
                                      int dst_stride_argb,
2101
                                      int width,
2102
                                      int height,
2103
0
                                      int depth) {
2104
0
  int y;
2105
0
  void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
2106
0
                            const uint16_t* src_b, uint8_t* dst_argb, int depth,
2107
0
                            int width) = MergeXRGB16To8Row_C;
2108
2109
  // Coalesce rows.
2110
0
  if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
2111
0
      dst_stride_argb == width * 4) {
2112
0
    width *= height;
2113
0
    height = 1;
2114
0
    src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
2115
0
  }
2116
0
#if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
2117
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2118
0
    MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
2119
0
    if (IS_ALIGNED(width, 16)) {
2120
0
      MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
2121
0
    }
2122
0
  }
2123
0
#endif
2124
#if defined(HAS_MERGEXRGB16TO8ROW_NEON)
2125
  if (TestCpuFlag(kCpuHasNEON)) {
2126
    MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
2127
    if (IS_ALIGNED(width, 8)) {
2128
      MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
2129
    }
2130
  }
2131
#endif
2132
2133
0
  for (y = 0; y < height; ++y) {
2134
0
    MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
2135
0
    src_r += src_stride_r;
2136
0
    src_g += src_stride_g;
2137
0
    src_b += src_stride_b;
2138
0
    dst_argb += dst_stride_argb;
2139
0
  }
2140
0
}
2141
2142
LIBYUV_API
2143
void MergeARGB16To8Plane(const uint16_t* src_r,
2144
                         int src_stride_r,
2145
                         const uint16_t* src_g,
2146
                         int src_stride_g,
2147
                         const uint16_t* src_b,
2148
                         int src_stride_b,
2149
                         const uint16_t* src_a,
2150
                         int src_stride_a,
2151
                         uint8_t* dst_argb,
2152
                         int dst_stride_argb,
2153
                         int width,
2154
                         int height,
2155
0
                         int depth) {
2156
  // Negative height means invert the image.
2157
0
  if (height < 0) {
2158
0
    height = -height;
2159
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2160
0
    dst_stride_argb = -dst_stride_argb;
2161
0
  }
2162
2163
0
  if (src_a == NULL) {
2164
0
    MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
2165
0
                              src_stride_b, dst_argb, dst_stride_argb, width,
2166
0
                              height, depth);
2167
0
  } else {
2168
0
    MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
2169
0
                             src_stride_b, src_a, src_stride_a, dst_argb,
2170
0
                             dst_stride_argb, width, height, depth);
2171
0
  }
2172
0
}
2173
2174
// Convert YUY2 to I422.
2175
LIBYUV_API
2176
int YUY2ToI422(const uint8_t* src_yuy2,
2177
               int src_stride_yuy2,
2178
               uint8_t* dst_y,
2179
               int dst_stride_y,
2180
               uint8_t* dst_u,
2181
               int dst_stride_u,
2182
               uint8_t* dst_v,
2183
               int dst_stride_v,
2184
               int width,
2185
0
               int height) {
2186
0
  int y;
2187
0
  void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
2188
0
                         uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
2189
0
  void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
2190
0
      YUY2ToYRow_C;
2191
0
  if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2192
0
    return -1;
2193
0
  }
2194
  // Negative height means invert the image.
2195
0
  if (height < 0) {
2196
0
    height = -height;
2197
0
    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
2198
0
    src_stride_yuy2 = -src_stride_yuy2;
2199
0
  }
2200
  // Coalesce rows.
2201
0
  if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
2202
0
      dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
2203
0
      width * height <= 32768) {
2204
0
    width *= height;
2205
0
    height = 1;
2206
0
    src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
2207
0
  }
2208
0
#if defined(HAS_YUY2TOYROW_SSE2)
2209
0
  if (TestCpuFlag(kCpuHasSSE2)) {
2210
0
    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
2211
0
    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
2212
0
    if (IS_ALIGNED(width, 16)) {
2213
0
      YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
2214
0
      YUY2ToYRow = YUY2ToYRow_SSE2;
2215
0
    }
2216
0
  }
2217
0
#endif
2218
0
#if defined(HAS_YUY2TOYROW_AVX2)
2219
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2220
0
    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
2221
0
    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
2222
0
    if (IS_ALIGNED(width, 32)) {
2223
0
      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
2224
0
      YUY2ToYRow = YUY2ToYRow_AVX2;
2225
0
    }
2226
0
  }
2227
0
#endif
2228
#if defined(HAS_YUY2TOYROW_NEON)
2229
  if (TestCpuFlag(kCpuHasNEON)) {
2230
    YUY2ToYRow = YUY2ToYRow_Any_NEON;
2231
    YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
2232
    if (IS_ALIGNED(width, 16)) {
2233
      YUY2ToYRow = YUY2ToYRow_NEON;
2234
      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
2235
    }
2236
  }
2237
#endif
2238
#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
2239
  if (TestCpuFlag(kCpuHasLSX)) {
2240
    YUY2ToYRow = YUY2ToYRow_Any_LSX;
2241
    YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX;
2242
    if (IS_ALIGNED(width, 16)) {
2243
      YUY2ToYRow = YUY2ToYRow_LSX;
2244
      YUY2ToUV422Row = YUY2ToUV422Row_LSX;
2245
    }
2246
  }
2247
#endif
2248
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
2249
  if (TestCpuFlag(kCpuHasLASX)) {
2250
    YUY2ToYRow = YUY2ToYRow_Any_LASX;
2251
    YUY2ToUV422Row = YUY2ToUV422Row_Any_LASX;
2252
    if (IS_ALIGNED(width, 32)) {
2253
      YUY2ToYRow = YUY2ToYRow_LASX;
2254
      YUY2ToUV422Row = YUY2ToUV422Row_LASX;
2255
    }
2256
  }
2257
#endif
2258
2259
0
  for (y = 0; y < height; ++y) {
2260
0
    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
2261
0
    YUY2ToYRow(src_yuy2, dst_y, width);
2262
0
    src_yuy2 += src_stride_yuy2;
2263
0
    dst_y += dst_stride_y;
2264
0
    dst_u += dst_stride_u;
2265
0
    dst_v += dst_stride_v;
2266
0
  }
2267
0
  return 0;
2268
0
}
2269
2270
// Convert UYVY to I422.
2271
LIBYUV_API
2272
int UYVYToI422(const uint8_t* src_uyvy,
2273
               int src_stride_uyvy,
2274
               uint8_t* dst_y,
2275
               int dst_stride_y,
2276
               uint8_t* dst_u,
2277
               int dst_stride_u,
2278
               uint8_t* dst_v,
2279
               int dst_stride_v,
2280
               int width,
2281
0
               int height) {
2282
0
  int y;
2283
0
  void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
2284
0
                         uint8_t* dst_v, int width) = UYVYToUV422Row_C;
2285
0
  void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
2286
0
      UYVYToYRow_C;
2287
0
  if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2288
0
    return -1;
2289
0
  }
2290
  // Negative height means invert the image.
2291
0
  if (height < 0) {
2292
0
    height = -height;
2293
0
    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
2294
0
    src_stride_uyvy = -src_stride_uyvy;
2295
0
  }
2296
  // Coalesce rows.
2297
0
  if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
2298
0
      dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
2299
0
      width * height <= 32768) {
2300
0
    width *= height;
2301
0
    height = 1;
2302
0
    src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
2303
0
  }
2304
0
#if defined(HAS_UYVYTOYROW_SSE2)
2305
0
  if (TestCpuFlag(kCpuHasSSE2)) {
2306
0
    UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
2307
0
    UYVYToYRow = UYVYToYRow_Any_SSE2;
2308
0
    if (IS_ALIGNED(width, 16)) {
2309
0
      UYVYToUV422Row = UYVYToUV422Row_SSE2;
2310
0
      UYVYToYRow = UYVYToYRow_SSE2;
2311
0
    }
2312
0
  }
2313
0
#endif
2314
0
#if defined(HAS_UYVYTOYROW_AVX2)
2315
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2316
0
    UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
2317
0
    UYVYToYRow = UYVYToYRow_Any_AVX2;
2318
0
    if (IS_ALIGNED(width, 32)) {
2319
0
      UYVYToUV422Row = UYVYToUV422Row_AVX2;
2320
0
      UYVYToYRow = UYVYToYRow_AVX2;
2321
0
    }
2322
0
  }
2323
0
#endif
2324
#if defined(HAS_UYVYTOYROW_NEON)
2325
  if (TestCpuFlag(kCpuHasNEON)) {
2326
    UYVYToYRow = UYVYToYRow_Any_NEON;
2327
    UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
2328
    if (IS_ALIGNED(width, 16)) {
2329
      UYVYToYRow = UYVYToYRow_NEON;
2330
      UYVYToUV422Row = UYVYToUV422Row_NEON;
2331
    }
2332
  }
2333
#endif
2334
#if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX)
2335
  if (TestCpuFlag(kCpuHasLSX)) {
2336
    UYVYToYRow = UYVYToYRow_Any_LSX;
2337
    UYVYToUV422Row = UYVYToUV422Row_Any_LSX;
2338
    if (IS_ALIGNED(width, 16)) {
2339
      UYVYToYRow = UYVYToYRow_LSX;
2340
      UYVYToUV422Row = UYVYToUV422Row_LSX;
2341
    }
2342
  }
2343
#endif
2344
#if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
2345
  if (TestCpuFlag(kCpuHasLASX)) {
2346
    UYVYToYRow = UYVYToYRow_Any_LASX;
2347
    UYVYToUV422Row = UYVYToUV422Row_Any_LASX;
2348
    if (IS_ALIGNED(width, 32)) {
2349
      UYVYToYRow = UYVYToYRow_LASX;
2350
      UYVYToUV422Row = UYVYToUV422Row_LASX;
2351
    }
2352
  }
2353
#endif
2354
2355
0
  for (y = 0; y < height; ++y) {
2356
0
    UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
2357
0
    UYVYToYRow(src_uyvy, dst_y, width);
2358
0
    src_uyvy += src_stride_uyvy;
2359
0
    dst_y += dst_stride_y;
2360
0
    dst_u += dst_stride_u;
2361
0
    dst_v += dst_stride_v;
2362
0
  }
2363
0
  return 0;
2364
0
}
2365
2366
// Convert YUY2 to Y.
2367
LIBYUV_API
2368
int YUY2ToY(const uint8_t* src_yuy2,
2369
            int src_stride_yuy2,
2370
            uint8_t* dst_y,
2371
            int dst_stride_y,
2372
            int width,
2373
0
            int height) {
2374
0
  int y;
2375
0
  void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
2376
0
      YUY2ToYRow_C;
2377
0
  if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
2378
0
    return -1;
2379
0
  }
2380
  // Negative height means invert the image.
2381
0
  if (height < 0) {
2382
0
    height = -height;
2383
0
    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
2384
0
    src_stride_yuy2 = -src_stride_yuy2;
2385
0
  }
2386
  // Coalesce rows.
2387
0
  if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
2388
0
    width *= height;
2389
0
    height = 1;
2390
0
    src_stride_yuy2 = dst_stride_y = 0;
2391
0
  }
2392
0
#if defined(HAS_YUY2TOYROW_SSE2)
2393
0
  if (TestCpuFlag(kCpuHasSSE2)) {
2394
0
    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
2395
0
    if (IS_ALIGNED(width, 16)) {
2396
0
      YUY2ToYRow = YUY2ToYRow_SSE2;
2397
0
    }
2398
0
  }
2399
0
#endif
2400
0
#if defined(HAS_YUY2TOYROW_AVX2)
2401
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2402
0
    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
2403
0
    if (IS_ALIGNED(width, 32)) {
2404
0
      YUY2ToYRow = YUY2ToYRow_AVX2;
2405
0
    }
2406
0
  }
2407
0
#endif
2408
#if defined(HAS_YUY2TOYROW_NEON)
2409
  if (TestCpuFlag(kCpuHasNEON)) {
2410
    YUY2ToYRow = YUY2ToYRow_Any_NEON;
2411
    if (IS_ALIGNED(width, 16)) {
2412
      YUY2ToYRow = YUY2ToYRow_NEON;
2413
    }
2414
  }
2415
#endif
2416
2417
0
  for (y = 0; y < height; ++y) {
2418
0
    YUY2ToYRow(src_yuy2, dst_y, width);
2419
0
    src_yuy2 += src_stride_yuy2;
2420
0
    dst_y += dst_stride_y;
2421
0
  }
2422
0
  return 0;
2423
0
}
2424
2425
// Convert UYVY to Y.
2426
LIBYUV_API
2427
int UYVYToY(const uint8_t* src_uyvy,
2428
            int src_stride_uyvy,
2429
            uint8_t* dst_y,
2430
            int dst_stride_y,
2431
            int width,
2432
0
            int height) {
2433
0
  int y;
2434
0
  void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
2435
0
      UYVYToYRow_C;
2436
0
  if (!src_uyvy || !dst_y || width <= 0 || height == 0) {
2437
0
    return -1;
2438
0
  }
2439
  // Negative height means invert the image.
2440
0
  if (height < 0) {
2441
0
    height = -height;
2442
0
    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
2443
0
    src_stride_uyvy = -src_stride_uyvy;
2444
0
  }
2445
  // Coalesce rows.
2446
0
  if (src_stride_uyvy == width * 2 && dst_stride_y == width) {
2447
0
    width *= height;
2448
0
    height = 1;
2449
0
    src_stride_uyvy = dst_stride_y = 0;
2450
0
  }
2451
0
#if defined(HAS_UYVYTOYROW_SSE2)
2452
0
  if (TestCpuFlag(kCpuHasSSE2)) {
2453
0
    UYVYToYRow = UYVYToYRow_Any_SSE2;
2454
0
    if (IS_ALIGNED(width, 16)) {
2455
0
      UYVYToYRow = UYVYToYRow_SSE2;
2456
0
    }
2457
0
  }
2458
0
#endif
2459
0
#if defined(HAS_UYVYTOYROW_AVX2)
2460
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2461
0
    UYVYToYRow = UYVYToYRow_Any_AVX2;
2462
0
    if (IS_ALIGNED(width, 32)) {
2463
0
      UYVYToYRow = UYVYToYRow_AVX2;
2464
0
    }
2465
0
  }
2466
0
#endif
2467
#if defined(HAS_UYVYTOYROW_NEON)
2468
  if (TestCpuFlag(kCpuHasNEON)) {
2469
    UYVYToYRow = UYVYToYRow_Any_NEON;
2470
    if (IS_ALIGNED(width, 16)) {
2471
      UYVYToYRow = UYVYToYRow_NEON;
2472
    }
2473
  }
2474
#endif
2475
#if defined(HAS_UYVYTOYROW_LSX)
2476
  if (TestCpuFlag(kCpuHasLSX)) {
2477
    UYVYToYRow = UYVYToYRow_Any_LSX;
2478
    if (IS_ALIGNED(width, 16)) {
2479
      UYVYToYRow = UYVYToYRow_LSX;
2480
    }
2481
  }
2482
#endif
2483
2484
0
  for (y = 0; y < height; ++y) {
2485
0
    UYVYToYRow(src_uyvy, dst_y, width);
2486
0
    src_uyvy += src_stride_uyvy;
2487
0
    dst_y += dst_stride_y;
2488
0
  }
2489
0
  return 0;
2490
0
}
2491
2492
// Mirror a plane of data.
2493
// See Also I400Mirror
2494
LIBYUV_API
2495
void MirrorPlane(const uint8_t* src_y,
2496
                 int src_stride_y,
2497
                 uint8_t* dst_y,
2498
                 int dst_stride_y,
2499
                 int width,
2500
0
                 int height) {
2501
0
  int y;
2502
0
  void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
2503
  // Negative height means invert the image.
2504
0
  if (height < 0) {
2505
0
    height = -height;
2506
0
    src_y = src_y + (height - 1) * src_stride_y;
2507
0
    src_stride_y = -src_stride_y;
2508
0
  }
2509
#if defined(HAS_MIRRORROW_NEON)
2510
  if (TestCpuFlag(kCpuHasNEON)) {
2511
    MirrorRow = MirrorRow_Any_NEON;
2512
    if (IS_ALIGNED(width, 32)) {
2513
      MirrorRow = MirrorRow_NEON;
2514
    }
2515
  }
2516
#endif
2517
0
#if defined(HAS_MIRRORROW_SSSE3)
2518
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
2519
0
    MirrorRow = MirrorRow_Any_SSSE3;
2520
0
    if (IS_ALIGNED(width, 16)) {
2521
0
      MirrorRow = MirrorRow_SSSE3;
2522
0
    }
2523
0
  }
2524
0
#endif
2525
0
#if defined(HAS_MIRRORROW_AVX2)
2526
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2527
0
    MirrorRow = MirrorRow_Any_AVX2;
2528
0
    if (IS_ALIGNED(width, 32)) {
2529
0
      MirrorRow = MirrorRow_AVX2;
2530
0
    }
2531
0
  }
2532
0
#endif
2533
#if defined(HAS_MIRRORROW_LSX)
2534
  if (TestCpuFlag(kCpuHasLSX)) {
2535
    MirrorRow = MirrorRow_Any_LSX;
2536
    if (IS_ALIGNED(width, 32)) {
2537
      MirrorRow = MirrorRow_LSX;
2538
    }
2539
  }
2540
#endif
2541
#if defined(HAS_MIRRORROW_LASX)
2542
  if (TestCpuFlag(kCpuHasLASX)) {
2543
    MirrorRow = MirrorRow_Any_LASX;
2544
    if (IS_ALIGNED(width, 64)) {
2545
      MirrorRow = MirrorRow_LASX;
2546
    }
2547
  }
2548
#endif
2549
2550
  // Mirror plane
2551
0
  for (y = 0; y < height; ++y) {
2552
0
    MirrorRow(src_y, dst_y, width);
2553
0
    src_y += src_stride_y;
2554
0
    dst_y += dst_stride_y;
2555
0
  }
2556
0
}
2557
2558
// Mirror a plane of UV data.
2559
LIBYUV_API
2560
void MirrorUVPlane(const uint8_t* src_uv,
2561
                   int src_stride_uv,
2562
                   uint8_t* dst_uv,
2563
                   int dst_stride_uv,
2564
                   int width,
2565
0
                   int height) {
2566
0
  int y;
2567
0
  void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) =
2568
0
      MirrorUVRow_C;
2569
  // Negative height means invert the image.
2570
0
  if (height < 0) {
2571
0
    height = -height;
2572
0
    src_uv = src_uv + (height - 1) * src_stride_uv;
2573
0
    src_stride_uv = -src_stride_uv;
2574
0
  }
2575
#if defined(HAS_MIRRORUVROW_NEON)
2576
  if (TestCpuFlag(kCpuHasNEON)) {
2577
    MirrorUVRow = MirrorUVRow_Any_NEON;
2578
    if (IS_ALIGNED(width, 32)) {
2579
      MirrorUVRow = MirrorUVRow_NEON;
2580
    }
2581
  }
2582
#endif
2583
0
#if defined(HAS_MIRRORUVROW_SSSE3)
2584
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
2585
0
    MirrorUVRow = MirrorUVRow_Any_SSSE3;
2586
0
    if (IS_ALIGNED(width, 8)) {
2587
0
      MirrorUVRow = MirrorUVRow_SSSE3;
2588
0
    }
2589
0
  }
2590
0
#endif
2591
0
#if defined(HAS_MIRRORUVROW_AVX2)
2592
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2593
0
    MirrorUVRow = MirrorUVRow_Any_AVX2;
2594
0
    if (IS_ALIGNED(width, 16)) {
2595
0
      MirrorUVRow = MirrorUVRow_AVX2;
2596
0
    }
2597
0
  }
2598
0
#endif
2599
#if defined(HAS_MIRRORUVROW_LSX)
2600
  if (TestCpuFlag(kCpuHasLSX)) {
2601
    MirrorUVRow = MirrorUVRow_Any_LSX;
2602
    if (IS_ALIGNED(width, 8)) {
2603
      MirrorUVRow = MirrorUVRow_LSX;
2604
    }
2605
  }
2606
#endif
2607
#if defined(HAS_MIRRORUVROW_LASX)
2608
  if (TestCpuFlag(kCpuHasLASX)) {
2609
    MirrorUVRow = MirrorUVRow_Any_LASX;
2610
    if (IS_ALIGNED(width, 16)) {
2611
      MirrorUVRow = MirrorUVRow_LASX;
2612
    }
2613
  }
2614
#endif
2615
2616
  // MirrorUV plane
2617
0
  for (y = 0; y < height; ++y) {
2618
0
    MirrorUVRow(src_uv, dst_uv, width);
2619
0
    src_uv += src_stride_uv;
2620
0
    dst_uv += dst_stride_uv;
2621
0
  }
2622
0
}
2623
2624
// Mirror I400 with optional flipping
2625
LIBYUV_API
2626
int I400Mirror(const uint8_t* src_y,
2627
               int src_stride_y,
2628
               uint8_t* dst_y,
2629
               int dst_stride_y,
2630
               int width,
2631
0
               int height) {
2632
0
  if (!src_y || !dst_y || width <= 0 || height == 0) {
2633
0
    return -1;
2634
0
  }
2635
  // Negative height means invert the image.
2636
0
  if (height < 0) {
2637
0
    height = -height;
2638
0
    src_y = src_y + (height - 1) * src_stride_y;
2639
0
    src_stride_y = -src_stride_y;
2640
0
  }
2641
2642
0
  MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2643
0
  return 0;
2644
0
}
2645
2646
// Mirror I420 with optional flipping
2647
LIBYUV_API
2648
int I420Mirror(const uint8_t* src_y,
2649
               int src_stride_y,
2650
               const uint8_t* src_u,
2651
               int src_stride_u,
2652
               const uint8_t* src_v,
2653
               int src_stride_v,
2654
               uint8_t* dst_y,
2655
               int dst_stride_y,
2656
               uint8_t* dst_u,
2657
               int dst_stride_u,
2658
               uint8_t* dst_v,
2659
               int dst_stride_v,
2660
               int width,
2661
0
               int height) {
2662
0
  int halfwidth = (width + 1) >> 1;
2663
0
  int halfheight = (height + 1) >> 1;
2664
2665
0
  if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
2666
0
      height == 0) {
2667
0
    return -1;
2668
0
  }
2669
2670
  // Negative height means invert the image.
2671
0
  if (height < 0) {
2672
0
    height = -height;
2673
0
    halfheight = (height + 1) >> 1;
2674
0
    src_y = src_y + (height - 1) * src_stride_y;
2675
0
    src_u = src_u + (halfheight - 1) * src_stride_u;
2676
0
    src_v = src_v + (halfheight - 1) * src_stride_v;
2677
0
    src_stride_y = -src_stride_y;
2678
0
    src_stride_u = -src_stride_u;
2679
0
    src_stride_v = -src_stride_v;
2680
0
  }
2681
2682
0
  if (dst_y) {
2683
0
    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2684
0
  }
2685
0
  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
2686
0
  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
2687
0
  return 0;
2688
0
}
2689
2690
// NV12 mirror.
2691
LIBYUV_API
2692
int NV12Mirror(const uint8_t* src_y,
2693
               int src_stride_y,
2694
               const uint8_t* src_uv,
2695
               int src_stride_uv,
2696
               uint8_t* dst_y,
2697
               int dst_stride_y,
2698
               uint8_t* dst_uv,
2699
               int dst_stride_uv,
2700
               int width,
2701
0
               int height) {
2702
0
  int halfwidth = (width + 1) >> 1;
2703
0
  int halfheight = (height + 1) >> 1;
2704
2705
0
  if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) {
2706
0
    return -1;
2707
0
  }
2708
2709
  // Negative height means invert the image.
2710
0
  if (height < 0) {
2711
0
    height = -height;
2712
0
    halfheight = (height + 1) >> 1;
2713
0
    src_y = src_y + (height - 1) * src_stride_y;
2714
0
    src_uv = src_uv + (halfheight - 1) * src_stride_uv;
2715
0
    src_stride_y = -src_stride_y;
2716
0
    src_stride_uv = -src_stride_uv;
2717
0
  }
2718
2719
0
  if (dst_y) {
2720
0
    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2721
0
  }
2722
0
  MirrorUVPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth,
2723
0
                halfheight);
2724
0
  return 0;
2725
0
}
2726
2727
// ARGB mirror.
2728
LIBYUV_API
2729
int ARGBMirror(const uint8_t* src_argb,
2730
               int src_stride_argb,
2731
               uint8_t* dst_argb,
2732
               int dst_stride_argb,
2733
               int width,
2734
0
               int height) {
2735
0
  int y;
2736
0
  void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2737
0
      ARGBMirrorRow_C;
2738
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2739
0
    return -1;
2740
0
  }
2741
  // Negative height means invert the image.
2742
0
  if (height < 0) {
2743
0
    height = -height;
2744
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
2745
0
    src_stride_argb = -src_stride_argb;
2746
0
  }
2747
#if defined(HAS_ARGBMIRRORROW_NEON)
2748
  if (TestCpuFlag(kCpuHasNEON)) {
2749
    ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
2750
    if (IS_ALIGNED(width, 8)) {
2751
      ARGBMirrorRow = ARGBMirrorRow_NEON;
2752
    }
2753
  }
2754
#endif
2755
0
#if defined(HAS_ARGBMIRRORROW_SSE2)
2756
0
  if (TestCpuFlag(kCpuHasSSE2)) {
2757
0
    ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
2758
0
    if (IS_ALIGNED(width, 4)) {
2759
0
      ARGBMirrorRow = ARGBMirrorRow_SSE2;
2760
0
    }
2761
0
  }
2762
0
#endif
2763
0
#if defined(HAS_ARGBMIRRORROW_AVX2)
2764
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2765
0
    ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
2766
0
    if (IS_ALIGNED(width, 8)) {
2767
0
      ARGBMirrorRow = ARGBMirrorRow_AVX2;
2768
0
    }
2769
0
  }
2770
0
#endif
2771
#if defined(HAS_ARGBMIRRORROW_LSX)
2772
  if (TestCpuFlag(kCpuHasLSX)) {
2773
    ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
2774
    if (IS_ALIGNED(width, 8)) {
2775
      ARGBMirrorRow = ARGBMirrorRow_LSX;
2776
    }
2777
  }
2778
#endif
2779
#if defined(HAS_ARGBMIRRORROW_LASX)
2780
  if (TestCpuFlag(kCpuHasLASX)) {
2781
    ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
2782
    if (IS_ALIGNED(width, 16)) {
2783
      ARGBMirrorRow = ARGBMirrorRow_LASX;
2784
    }
2785
  }
2786
#endif
2787
2788
  // Mirror plane
2789
0
  for (y = 0; y < height; ++y) {
2790
0
    ARGBMirrorRow(src_argb, dst_argb, width);
2791
0
    src_argb += src_stride_argb;
2792
0
    dst_argb += dst_stride_argb;
2793
0
  }
2794
0
  return 0;
2795
0
}
2796
2797
// RGB24 mirror.
2798
LIBYUV_API
2799
int RGB24Mirror(const uint8_t* src_rgb24,
2800
                int src_stride_rgb24,
2801
                uint8_t* dst_rgb24,
2802
                int dst_stride_rgb24,
2803
                int width,
2804
0
                int height) {
2805
0
  int y;
2806
0
  void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2807
0
      RGB24MirrorRow_C;
2808
0
  if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
2809
0
    return -1;
2810
0
  }
2811
  // Negative height means invert the image.
2812
0
  if (height < 0) {
2813
0
    height = -height;
2814
0
    src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
2815
0
    src_stride_rgb24 = -src_stride_rgb24;
2816
0
  }
2817
#if defined(HAS_RGB24MIRRORROW_NEON)
2818
  if (TestCpuFlag(kCpuHasNEON)) {
2819
    RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
2820
    if (IS_ALIGNED(width, 16)) {
2821
      RGB24MirrorRow = RGB24MirrorRow_NEON;
2822
    }
2823
  }
2824
#endif
2825
0
#if defined(HAS_RGB24MIRRORROW_SSSE3)
2826
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
2827
0
    RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
2828
0
    if (IS_ALIGNED(width, 16)) {
2829
0
      RGB24MirrorRow = RGB24MirrorRow_SSSE3;
2830
0
    }
2831
0
  }
2832
0
#endif
2833
2834
  // Mirror plane
2835
0
  for (y = 0; y < height; ++y) {
2836
0
    RGB24MirrorRow(src_rgb24, dst_rgb24, width);
2837
0
    src_rgb24 += src_stride_rgb24;
2838
0
    dst_rgb24 += dst_stride_rgb24;
2839
0
  }
2840
0
  return 0;
2841
0
}
2842
2843
// Alpha Blend 2 ARGB images and store to destination.
2844
LIBYUV_API
2845
int ARGBBlend(const uint8_t* src_argb0,
2846
              int src_stride_argb0,
2847
              const uint8_t* src_argb1,
2848
              int src_stride_argb1,
2849
              uint8_t* dst_argb,
2850
              int dst_stride_argb,
2851
              int width,
2852
0
              int height) {
2853
0
  int y;
2854
0
  void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
2855
0
                       uint8_t* dst_argb, int width) = ARGBBlendRow_C;
2856
0
  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2857
0
    return -1;
2858
0
  }
2859
  // Negative height means invert the image.
2860
0
  if (height < 0) {
2861
0
    height = -height;
2862
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2863
0
    dst_stride_argb = -dst_stride_argb;
2864
0
  }
2865
  // Coalesce rows.
2866
0
  if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2867
0
      dst_stride_argb == width * 4) {
2868
0
    width *= height;
2869
0
    height = 1;
2870
0
    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2871
0
  }
2872
0
#if defined(HAS_ARGBBLENDROW_SSSE3)
2873
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
2874
0
    ARGBBlendRow = ARGBBlendRow_SSSE3;
2875
0
  }
2876
0
#endif
2877
#if defined(HAS_ARGBBLENDROW_NEON)
2878
  if (TestCpuFlag(kCpuHasNEON)) {
2879
    ARGBBlendRow = ARGBBlendRow_NEON;
2880
  }
2881
#endif
2882
#if defined(HAS_ARGBBLENDROW_LSX)
2883
  if (TestCpuFlag(kCpuHasLSX)) {
2884
    ARGBBlendRow = ARGBBlendRow_LSX;
2885
  }
2886
#endif
2887
#if defined(HAS_ARGBBLENDROW_RVV)
2888
  if (TestCpuFlag(kCpuHasRVV)) {
2889
    ARGBBlendRow = ARGBBlendRow_RVV;
2890
  }
2891
#endif
2892
0
  for (y = 0; y < height; ++y) {
2893
0
    ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
2894
0
    src_argb0 += src_stride_argb0;
2895
0
    src_argb1 += src_stride_argb1;
2896
0
    dst_argb += dst_stride_argb;
2897
0
  }
2898
0
  return 0;
2899
0
}
2900
2901
// Alpha Blend plane and store to destination.
2902
LIBYUV_API
2903
int BlendPlane(const uint8_t* src_y0,
2904
               int src_stride_y0,
2905
               const uint8_t* src_y1,
2906
               int src_stride_y1,
2907
               const uint8_t* alpha,
2908
               int alpha_stride,
2909
               uint8_t* dst_y,
2910
               int dst_stride_y,
2911
               int width,
2912
0
               int height) {
2913
0
  int y;
2914
0
  void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2915
0
                        const uint8_t* alpha, uint8_t* dst, int width) =
2916
0
      BlendPlaneRow_C;
2917
0
  if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
2918
0
    return -1;
2919
0
  }
2920
  // Negative height means invert the image.
2921
0
  if (height < 0) {
2922
0
    height = -height;
2923
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
2924
0
    dst_stride_y = -dst_stride_y;
2925
0
  }
2926
2927
  // Coalesce rows for Y plane.
2928
0
  if (src_stride_y0 == width && src_stride_y1 == width &&
2929
0
      alpha_stride == width && dst_stride_y == width) {
2930
0
    width *= height;
2931
0
    height = 1;
2932
0
    src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
2933
0
  }
2934
2935
0
#if defined(HAS_BLENDPLANEROW_SSSE3)
2936
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
2937
0
    BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2938
0
    if (IS_ALIGNED(width, 8)) {
2939
0
      BlendPlaneRow = BlendPlaneRow_SSSE3;
2940
0
    }
2941
0
  }
2942
0
#endif
2943
0
#if defined(HAS_BLENDPLANEROW_AVX2)
2944
0
  if (TestCpuFlag(kCpuHasAVX2)) {
2945
0
    BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2946
0
    if (IS_ALIGNED(width, 32)) {
2947
0
      BlendPlaneRow = BlendPlaneRow_AVX2;
2948
0
    }
2949
0
  }
2950
0
#endif
2951
#if defined(HAS_BLENDPLANEROW_RVV)
2952
  if (TestCpuFlag(kCpuHasRVV)) {
2953
    BlendPlaneRow = BlendPlaneRow_RVV;
2954
  }
2955
#endif
2956
2957
0
  for (y = 0; y < height; ++y) {
2958
0
    BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
2959
0
    src_y0 += src_stride_y0;
2960
0
    src_y1 += src_stride_y1;
2961
0
    alpha += alpha_stride;
2962
0
    dst_y += dst_stride_y;
2963
0
  }
2964
0
  return 0;
2965
0
}
2966
2967
#define MAXTWIDTH 2048
2968
// Alpha Blend YUV images and store to destination.
2969
LIBYUV_API
2970
int I420Blend(const uint8_t* src_y0,
2971
              int src_stride_y0,
2972
              const uint8_t* src_u0,
2973
              int src_stride_u0,
2974
              const uint8_t* src_v0,
2975
              int src_stride_v0,
2976
              const uint8_t* src_y1,
2977
              int src_stride_y1,
2978
              const uint8_t* src_u1,
2979
              int src_stride_u1,
2980
              const uint8_t* src_v1,
2981
              int src_stride_v1,
2982
              const uint8_t* alpha,
2983
              int alpha_stride,
2984
              uint8_t* dst_y,
2985
              int dst_stride_y,
2986
              uint8_t* dst_u,
2987
              int dst_stride_u,
2988
              uint8_t* dst_v,
2989
              int dst_stride_v,
2990
              int width,
2991
0
              int height) {
2992
0
  int y;
2993
  // Half width/height for UV.
2994
0
  int halfwidth = (width + 1) >> 1;
2995
0
  void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2996
0
                        const uint8_t* alpha, uint8_t* dst, int width) =
2997
0
      BlendPlaneRow_C;
2998
0
  void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
2999
0
                        uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
3000
3001
0
  if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
3002
0
      !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
3003
0
    return -1;
3004
0
  }
3005
3006
  // Negative height means invert the image.
3007
0
  if (height < 0) {
3008
0
    height = -height;
3009
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
3010
0
    dst_stride_y = -dst_stride_y;
3011
0
  }
3012
3013
  // Blend Y plane.
3014
0
  BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
3015
0
             dst_y, dst_stride_y, width, height);
3016
3017
0
#if defined(HAS_BLENDPLANEROW_SSSE3)
3018
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
3019
0
    BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
3020
0
    if (IS_ALIGNED(halfwidth, 8)) {
3021
0
      BlendPlaneRow = BlendPlaneRow_SSSE3;
3022
0
    }
3023
0
  }
3024
0
#endif
3025
0
#if defined(HAS_BLENDPLANEROW_AVX2)
3026
0
  if (TestCpuFlag(kCpuHasAVX2)) {
3027
0
    BlendPlaneRow = BlendPlaneRow_Any_AVX2;
3028
0
    if (IS_ALIGNED(halfwidth, 32)) {
3029
0
      BlendPlaneRow = BlendPlaneRow_AVX2;
3030
0
    }
3031
0
  }
3032
0
#endif
3033
#if defined(HAS_BLENDPLANEROW_RVV)
3034
  if (TestCpuFlag(kCpuHasRVV)) {
3035
    BlendPlaneRow = BlendPlaneRow_RVV;
3036
  }
3037
#endif
3038
0
  if (!IS_ALIGNED(width, 2)) {
3039
0
    ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
3040
0
  }
3041
#if defined(HAS_SCALEROWDOWN2_NEON)
3042
  if (TestCpuFlag(kCpuHasNEON)) {
3043
    ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
3044
    if (IS_ALIGNED(width, 2)) {
3045
      ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
3046
      if (IS_ALIGNED(halfwidth, 16)) {
3047
        ScaleRowDown2 = ScaleRowDown2Box_NEON;
3048
      }
3049
    }
3050
  }
3051
#endif
3052
0
#if defined(HAS_SCALEROWDOWN2_SSSE3)
3053
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
3054
0
    ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
3055
0
    if (IS_ALIGNED(width, 2)) {
3056
0
      ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
3057
0
      if (IS_ALIGNED(halfwidth, 16)) {
3058
0
        ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
3059
0
      }
3060
0
    }
3061
0
  }
3062
0
#endif
3063
0
#if defined(HAS_SCALEROWDOWN2_AVX2)
3064
0
  if (TestCpuFlag(kCpuHasAVX2)) {
3065
0
    ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
3066
0
    if (IS_ALIGNED(width, 2)) {
3067
0
      ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
3068
0
      if (IS_ALIGNED(halfwidth, 32)) {
3069
0
        ScaleRowDown2 = ScaleRowDown2Box_AVX2;
3070
0
      }
3071
0
    }
3072
0
  }
3073
0
#endif
3074
#if defined(HAS_SCALEROWDOWN2_RVV)
3075
  if (TestCpuFlag(kCpuHasRVV)) {
3076
    ScaleRowDown2 = ScaleRowDown2Box_RVV;
3077
  }
3078
#endif
3079
3080
  // Row buffer for intermediate alpha pixels.
3081
0
  align_buffer_64(halfalpha, halfwidth);
3082
0
  if (!halfalpha)
3083
0
    return 1;
3084
0
  for (y = 0; y < height; y += 2) {
3085
    // last row of odd height image use 1 row of alpha instead of 2.
3086
0
    if (y == (height - 1)) {
3087
0
      alpha_stride = 0;
3088
0
    }
3089
    // Subsample 2 rows of UV to half width and half height.
3090
0
    ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
3091
0
    alpha += alpha_stride * 2;
3092
0
    BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
3093
0
    BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
3094
0
    src_u0 += src_stride_u0;
3095
0
    src_u1 += src_stride_u1;
3096
0
    dst_u += dst_stride_u;
3097
0
    src_v0 += src_stride_v0;
3098
0
    src_v1 += src_stride_v1;
3099
0
    dst_v += dst_stride_v;
3100
0
  }
3101
0
  free_aligned_buffer_64(halfalpha);
3102
0
  return 0;
3103
0
}
3104
3105
// Multiply 2 ARGB images and store to destination.
3106
LIBYUV_API
3107
int ARGBMultiply(const uint8_t* src_argb0,
3108
                 int src_stride_argb0,
3109
                 const uint8_t* src_argb1,
3110
                 int src_stride_argb1,
3111
                 uint8_t* dst_argb,
3112
                 int dst_stride_argb,
3113
                 int width,
3114
0
                 int height) {
3115
0
  int y;
3116
0
  void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
3117
0
                          uint8_t* dst, int width) = ARGBMultiplyRow_C;
3118
0
  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
3119
0
    return -1;
3120
0
  }
3121
  // Negative height means invert the image.
3122
0
  if (height < 0) {
3123
0
    height = -height;
3124
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3125
0
    dst_stride_argb = -dst_stride_argb;
3126
0
  }
3127
  // Coalesce rows.
3128
0
  if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
3129
0
      dst_stride_argb == width * 4) {
3130
0
    width *= height;
3131
0
    height = 1;
3132
0
    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
3133
0
  }
3134
#if defined(HAS_ARGBMULTIPLYROW_SSE2)
3135
  if (TestCpuFlag(kCpuHasSSE2)) {
3136
    ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
3137
    if (IS_ALIGNED(width, 4)) {
3138
      ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
3139
    }
3140
  }
3141
#endif
3142
#if defined(HAS_ARGBMULTIPLYROW_AVX2)
3143
  if (TestCpuFlag(kCpuHasAVX2)) {
3144
    ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
3145
    if (IS_ALIGNED(width, 8)) {
3146
      ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
3147
    }
3148
  }
3149
#endif
3150
#if defined(HAS_ARGBMULTIPLYROW_NEON)
3151
  if (TestCpuFlag(kCpuHasNEON)) {
3152
    ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
3153
    if (IS_ALIGNED(width, 8)) {
3154
      ARGBMultiplyRow = ARGBMultiplyRow_NEON;
3155
    }
3156
  }
3157
#endif
3158
#if defined(HAS_ARGBMULTIPLYROW_SME)
3159
  if (TestCpuFlag(kCpuHasSME)) {
3160
    ARGBMultiplyRow = ARGBMultiplyRow_SME;
3161
  }
3162
#endif
3163
#if defined(HAS_ARGBMULTIPLYROW_LSX)
3164
  if (TestCpuFlag(kCpuHasLSX)) {
3165
    ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX;
3166
    if (IS_ALIGNED(width, 4)) {
3167
      ARGBMultiplyRow = ARGBMultiplyRow_LSX;
3168
    }
3169
  }
3170
#endif
3171
#if defined(HAS_ARGBMULTIPLYROW_LASX)
3172
  if (TestCpuFlag(kCpuHasLASX)) {
3173
    ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
3174
    if (IS_ALIGNED(width, 8)) {
3175
      ARGBMultiplyRow = ARGBMultiplyRow_LASX;
3176
    }
3177
  }
3178
#endif
3179
3180
  // Multiply plane
3181
0
  for (y = 0; y < height; ++y) {
3182
0
    ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
3183
0
    src_argb0 += src_stride_argb0;
3184
0
    src_argb1 += src_stride_argb1;
3185
0
    dst_argb += dst_stride_argb;
3186
0
  }
3187
0
  return 0;
3188
0
}
3189
3190
// Add 2 ARGB images and store to destination.
3191
LIBYUV_API
3192
int ARGBAdd(const uint8_t* src_argb0,
3193
            int src_stride_argb0,
3194
            const uint8_t* src_argb1,
3195
            int src_stride_argb1,
3196
            uint8_t* dst_argb,
3197
            int dst_stride_argb,
3198
            int width,
3199
0
            int height) {
3200
0
  int y;
3201
0
  void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
3202
0
                     int width) = ARGBAddRow_C;
3203
0
  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
3204
0
    return -1;
3205
0
  }
3206
  // Negative height means invert the image.
3207
0
  if (height < 0) {
3208
0
    height = -height;
3209
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3210
0
    dst_stride_argb = -dst_stride_argb;
3211
0
  }
3212
  // Coalesce rows.
3213
0
  if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
3214
0
      dst_stride_argb == width * 4) {
3215
0
    width *= height;
3216
0
    height = 1;
3217
0
    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
3218
0
  }
3219
0
#if defined(HAS_ARGBADDROW_SSE2)
3220
0
  if (TestCpuFlag(kCpuHasSSE2)) {
3221
0
    ARGBAddRow = ARGBAddRow_SSE2;
3222
0
  }
3223
0
#endif
3224
0
#if defined(HAS_ARGBADDROW_SSE2)
3225
0
  if (TestCpuFlag(kCpuHasSSE2)) {
3226
0
    ARGBAddRow = ARGBAddRow_Any_SSE2;
3227
0
    if (IS_ALIGNED(width, 4)) {
3228
0
      ARGBAddRow = ARGBAddRow_SSE2;
3229
0
    }
3230
0
  }
3231
0
#endif
3232
0
#if defined(HAS_ARGBADDROW_AVX2)
3233
0
  if (TestCpuFlag(kCpuHasAVX2)) {
3234
0
    ARGBAddRow = ARGBAddRow_Any_AVX2;
3235
0
    if (IS_ALIGNED(width, 8)) {
3236
0
      ARGBAddRow = ARGBAddRow_AVX2;
3237
0
    }
3238
0
  }
3239
0
#endif
3240
#if defined(HAS_ARGBADDROW_NEON)
3241
  if (TestCpuFlag(kCpuHasNEON)) {
3242
    ARGBAddRow = ARGBAddRow_Any_NEON;
3243
    if (IS_ALIGNED(width, 8)) {
3244
      ARGBAddRow = ARGBAddRow_NEON;
3245
    }
3246
  }
3247
#endif
3248
#if defined(HAS_ARGBADDROW_LSX)
3249
  if (TestCpuFlag(kCpuHasLSX)) {
3250
    ARGBAddRow = ARGBAddRow_Any_LSX;
3251
    if (IS_ALIGNED(width, 4)) {
3252
      ARGBAddRow = ARGBAddRow_LSX;
3253
    }
3254
  }
3255
#endif
3256
#if defined(HAS_ARGBADDROW_LASX)
3257
  if (TestCpuFlag(kCpuHasLASX)) {
3258
    ARGBAddRow = ARGBAddRow_Any_LASX;
3259
    if (IS_ALIGNED(width, 8)) {
3260
      ARGBAddRow = ARGBAddRow_LASX;
3261
    }
3262
  }
3263
#endif
3264
3265
  // Add plane
3266
0
  for (y = 0; y < height; ++y) {
3267
0
    ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
3268
0
    src_argb0 += src_stride_argb0;
3269
0
    src_argb1 += src_stride_argb1;
3270
0
    dst_argb += dst_stride_argb;
3271
0
  }
3272
0
  return 0;
3273
0
}
3274
3275
// Subtract 2 ARGB images and store to destination.
3276
LIBYUV_API
3277
int ARGBSubtract(const uint8_t* src_argb0,
3278
                 int src_stride_argb0,
3279
                 const uint8_t* src_argb1,
3280
                 int src_stride_argb1,
3281
                 uint8_t* dst_argb,
3282
                 int dst_stride_argb,
3283
                 int width,
3284
0
                 int height) {
3285
0
  int y;
3286
0
  void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
3287
0
                          uint8_t* dst, int width) = ARGBSubtractRow_C;
3288
0
  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
3289
0
    return -1;
3290
0
  }
3291
  // Negative height means invert the image.
3292
0
  if (height < 0) {
3293
0
    height = -height;
3294
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3295
0
    dst_stride_argb = -dst_stride_argb;
3296
0
  }
3297
  // Coalesce rows.
3298
0
  if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
3299
0
      dst_stride_argb == width * 4) {
3300
0
    width *= height;
3301
0
    height = 1;
3302
0
    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
3303
0
  }
3304
0
#if defined(HAS_ARGBSUBTRACTROW_SSE2)
3305
0
  if (TestCpuFlag(kCpuHasSSE2)) {
3306
0
    ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
3307
0
    if (IS_ALIGNED(width, 4)) {
3308
0
      ARGBSubtractRow = ARGBSubtractRow_SSE2;
3309
0
    }
3310
0
  }
3311
0
#endif
3312
0
#if defined(HAS_ARGBSUBTRACTROW_AVX2)
3313
0
  if (TestCpuFlag(kCpuHasAVX2)) {
3314
0
    ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
3315
0
    if (IS_ALIGNED(width, 8)) {
3316
0
      ARGBSubtractRow = ARGBSubtractRow_AVX2;
3317
0
    }
3318
0
  }
3319
0
#endif
3320
#if defined(HAS_ARGBSUBTRACTROW_NEON)
3321
  if (TestCpuFlag(kCpuHasNEON)) {
3322
    ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
3323
    if (IS_ALIGNED(width, 8)) {
3324
      ARGBSubtractRow = ARGBSubtractRow_NEON;
3325
    }
3326
  }
3327
#endif
3328
#if defined(HAS_ARGBSUBTRACTROW_LSX)
3329
  if (TestCpuFlag(kCpuHasLSX)) {
3330
    ARGBSubtractRow = ARGBSubtractRow_Any_LSX;
3331
    if (IS_ALIGNED(width, 4)) {
3332
      ARGBSubtractRow = ARGBSubtractRow_LSX;
3333
    }
3334
  }
3335
#endif
3336
#if defined(HAS_ARGBSUBTRACTROW_LASX)
3337
  if (TestCpuFlag(kCpuHasLASX)) {
3338
    ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
3339
    if (IS_ALIGNED(width, 8)) {
3340
      ARGBSubtractRow = ARGBSubtractRow_LASX;
3341
    }
3342
  }
3343
#endif
3344
3345
  // Subtract plane
3346
0
  for (y = 0; y < height; ++y) {
3347
0
    ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
3348
0
    src_argb0 += src_stride_argb0;
3349
0
    src_argb1 += src_stride_argb1;
3350
0
    dst_argb += dst_stride_argb;
3351
0
  }
3352
0
  return 0;
3353
0
}
3354
3355
// Convert RAW to RGB24.
3356
LIBYUV_API
3357
int RAWToRGB24(const uint8_t* src_raw,
3358
               int src_stride_raw,
3359
               uint8_t* dst_rgb24,
3360
               int dst_stride_rgb24,
3361
               int width,
3362
0
               int height) {
3363
0
  int y;
3364
0
  void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
3365
0
      RAWToRGB24Row_C;
3366
0
  if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
3367
0
    return -1;
3368
0
  }
3369
  // Negative height means invert the image.
3370
0
  if (height < 0) {
3371
0
    height = -height;
3372
0
    src_raw = src_raw + (height - 1) * src_stride_raw;
3373
0
    src_stride_raw = -src_stride_raw;
3374
0
  }
3375
  // Coalesce rows.
3376
0
  if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
3377
0
    width *= height;
3378
0
    height = 1;
3379
0
    src_stride_raw = dst_stride_rgb24 = 0;
3380
0
  }
3381
0
#if defined(HAS_RAWTORGB24ROW_SSSE3)
3382
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
3383
0
    RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
3384
0
    if (IS_ALIGNED(width, 8)) {
3385
0
      RAWToRGB24Row = RAWToRGB24Row_SSSE3;
3386
0
    }
3387
0
  }
3388
0
#endif
3389
#if defined(HAS_RAWTORGB24ROW_NEON)
3390
  if (TestCpuFlag(kCpuHasNEON)) {
3391
    RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
3392
    if (IS_ALIGNED(width, 8)) {
3393
      RAWToRGB24Row = RAWToRGB24Row_NEON;
3394
    }
3395
  }
3396
#endif
3397
#if defined(HAS_RAWTORGB24ROW_SVE2)
3398
  if (TestCpuFlag(kCpuHasSVE2)) {
3399
    RAWToRGB24Row = RAWToRGB24Row_SVE2;
3400
  }
3401
#endif
3402
#if defined(HAS_RAWTORGB24ROW_LSX)
3403
  if (TestCpuFlag(kCpuHasLSX)) {
3404
    RAWToRGB24Row = RAWToRGB24Row_Any_LSX;
3405
    if (IS_ALIGNED(width, 16)) {
3406
      RAWToRGB24Row = RAWToRGB24Row_LSX;
3407
    }
3408
  }
3409
#endif
3410
#if defined(HAS_RAWTORGB24ROW_RVV)
3411
  if (TestCpuFlag(kCpuHasRVV)) {
3412
    RAWToRGB24Row = RAWToRGB24Row_RVV;
3413
  }
3414
#endif
3415
3416
0
  for (y = 0; y < height; ++y) {
3417
0
    RAWToRGB24Row(src_raw, dst_rgb24, width);
3418
0
    src_raw += src_stride_raw;
3419
0
    dst_rgb24 += dst_stride_rgb24;
3420
0
  }
3421
0
  return 0;
3422
0
}
3423
3424
// TODO(fbarchard): Consider uint8_t value
3425
LIBYUV_API
3426
void SetPlane(uint8_t* dst_y,
3427
              int dst_stride_y,
3428
              int width,
3429
              int height,
3430
0
              uint32_t value) {
3431
0
  int y;
3432
0
  void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
3433
3434
0
  if (width <= 0 || height == 0) {
3435
0
    return;
3436
0
  }
3437
0
  if (height < 0) {
3438
0
    height = -height;
3439
0
    dst_y = dst_y + (height - 1) * dst_stride_y;
3440
0
    dst_stride_y = -dst_stride_y;
3441
0
  }
3442
  // Coalesce rows.
3443
0
  if (dst_stride_y == width) {
3444
0
    width *= height;
3445
0
    height = 1;
3446
0
    dst_stride_y = 0;
3447
0
  }
3448
#if defined(HAS_SETROW_NEON)
3449
  if (TestCpuFlag(kCpuHasNEON)) {
3450
    SetRow = SetRow_Any_NEON;
3451
    if (IS_ALIGNED(width, 16)) {
3452
      SetRow = SetRow_NEON;
3453
    }
3454
  }
3455
#endif
3456
0
#if defined(HAS_SETROW_X86)
3457
0
  if (TestCpuFlag(kCpuHasX86)) {
3458
0
    SetRow = SetRow_Any_X86;
3459
0
    if (IS_ALIGNED(width, 4)) {
3460
0
      SetRow = SetRow_X86;
3461
0
    }
3462
0
  }
3463
0
#endif
3464
0
#if defined(HAS_SETROW_ERMS)
3465
0
  if (TestCpuFlag(kCpuHasERMS)) {
3466
0
    SetRow = SetRow_ERMS;
3467
0
  }
3468
0
#endif
3469
#if defined(HAS_SETROW_LSX)
3470
  if (TestCpuFlag(kCpuHasLSX)) {
3471
    SetRow = SetRow_Any_LSX;
3472
    if (IS_ALIGNED(width, 16)) {
3473
      SetRow = SetRow_LSX;
3474
    }
3475
  }
3476
#endif
3477
3478
  // Set plane
3479
0
  for (y = 0; y < height; ++y) {
3480
0
    SetRow(dst_y, (uint8_t)value, width);
3481
0
    dst_y += dst_stride_y;
3482
0
  }
3483
0
}
3484
3485
// Draw a rectangle into I420
3486
LIBYUV_API
3487
int I420Rect(uint8_t* dst_y,
3488
             int dst_stride_y,
3489
             uint8_t* dst_u,
3490
             int dst_stride_u,
3491
             uint8_t* dst_v,
3492
             int dst_stride_v,
3493
             int x,
3494
             int y,
3495
             int width,
3496
             int height,
3497
             int value_y,
3498
             int value_u,
3499
0
             int value_v) {
3500
0
  int halfwidth = (width + 1) >> 1;
3501
0
  int halfheight = (height + 1) >> 1;
3502
0
  uint8_t* start_y = dst_y + y * dst_stride_y + x;
3503
0
  uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
3504
0
  uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
3505
3506
0
  if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
3507
0
      y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
3508
0
      value_v < 0 || value_v > 255) {
3509
0
    return -1;
3510
0
  }
3511
3512
0
  SetPlane(start_y, dst_stride_y, width, height, value_y);
3513
0
  SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
3514
0
  SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
3515
0
  return 0;
3516
0
}
3517
3518
// Draw a rectangle into ARGB
3519
LIBYUV_API
3520
int ARGBRect(uint8_t* dst_argb,
3521
             int dst_stride_argb,
3522
             int dst_x,
3523
             int dst_y,
3524
             int width,
3525
             int height,
3526
0
             uint32_t value) {
3527
0
  int y;
3528
0
  void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
3529
0
      ARGBSetRow_C;
3530
0
  if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
3531
0
    return -1;
3532
0
  }
3533
0
  if (height < 0) {
3534
0
    height = -height;
3535
0
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3536
0
    dst_stride_argb = -dst_stride_argb;
3537
0
  }
3538
0
  dst_argb += dst_y * dst_stride_argb + dst_x * 4;
3539
  // Coalesce rows.
3540
0
  if (dst_stride_argb == width * 4) {
3541
0
    width *= height;
3542
0
    height = 1;
3543
0
    dst_stride_argb = 0;
3544
0
  }
3545
3546
#if defined(HAS_ARGBSETROW_NEON)
3547
  if (TestCpuFlag(kCpuHasNEON)) {
3548
    ARGBSetRow = ARGBSetRow_Any_NEON;
3549
    if (IS_ALIGNED(width, 4)) {
3550
      ARGBSetRow = ARGBSetRow_NEON;
3551
    }
3552
  }
3553
#endif
3554
0
#if defined(HAS_ARGBSETROW_X86)
3555
0
  if (TestCpuFlag(kCpuHasX86)) {
3556
0
    ARGBSetRow = ARGBSetRow_X86;
3557
0
  }
3558
0
#endif
3559
#if defined(HAS_ARGBSETROW_LSX)
3560
  if (TestCpuFlag(kCpuHasLSX)) {
3561
    ARGBSetRow = ARGBSetRow_Any_LSX;
3562
    if (IS_ALIGNED(width, 4)) {
3563
      ARGBSetRow = ARGBSetRow_LSX;
3564
    }
3565
  }
3566
#endif
3567
3568
  // Set plane
3569
0
  for (y = 0; y < height; ++y) {
3570
0
    ARGBSetRow(dst_argb, value, width);
3571
0
    dst_argb += dst_stride_argb;
3572
0
  }
3573
0
  return 0;
3574
0
}
3575
3576
// Convert unattentuated ARGB to preattenuated ARGB.
3577
// An unattenutated ARGB alpha blend uses the formula
3578
// p = a * f + (1 - a) * b
3579
// where
3580
//   p is output pixel
3581
//   f is foreground pixel
3582
//   b is background pixel
3583
//   a is alpha value from foreground pixel
3584
// An preattenutated ARGB alpha blend uses the formula
3585
// p = f + (1 - a) * b
3586
// where
3587
//   f is foreground pixel premultiplied by alpha
3588
3589
LIBYUV_API
3590
int ARGBAttenuate(const uint8_t* src_argb,
3591
                  int src_stride_argb,
3592
                  uint8_t* dst_argb,
3593
                  int dst_stride_argb,
3594
                  int width,
3595
776
                  int height) {
3596
776
  int y;
3597
776
  void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3598
776
                           int width) = ARGBAttenuateRow_C;
3599
776
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3600
0
    return -1;
3601
0
  }
3602
776
  if (height < 0) {
3603
0
    height = -height;
3604
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
3605
0
    src_stride_argb = -src_stride_argb;
3606
0
  }
3607
  // Coalesce rows.
3608
776
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3609
776
    width *= height;
3610
776
    height = 1;
3611
776
    src_stride_argb = dst_stride_argb = 0;
3612
776
  }
3613
776
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
3614
776
  if (TestCpuFlag(kCpuHasSSSE3)) {
3615
776
    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
3616
776
    if (IS_ALIGNED(width, 4)) {
3617
551
      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
3618
551
    }
3619
776
  }
3620
776
#endif
3621
776
#if defined(HAS_ARGBATTENUATEROW_AVX2)
3622
776
  if (TestCpuFlag(kCpuHasAVX2)) {
3623
776
    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
3624
776
    if (IS_ALIGNED(width, 8)) {
3625
428
      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
3626
428
    }
3627
776
  }
3628
776
#endif
3629
#if defined(HAS_ARGBATTENUATEROW_NEON)
3630
  if (TestCpuFlag(kCpuHasNEON)) {
3631
    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
3632
    if (IS_ALIGNED(width, 8)) {
3633
      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
3634
    }
3635
  }
3636
#endif
3637
#if defined(HAS_ARGBATTENUATEROW_LSX)
3638
  if (TestCpuFlag(kCpuHasLSX)) {
3639
    ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX;
3640
    if (IS_ALIGNED(width, 8)) {
3641
      ARGBAttenuateRow = ARGBAttenuateRow_LSX;
3642
    }
3643
  }
3644
#endif
3645
#if defined(HAS_ARGBATTENUATEROW_LASX)
3646
  if (TestCpuFlag(kCpuHasLASX)) {
3647
    ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
3648
    if (IS_ALIGNED(width, 16)) {
3649
      ARGBAttenuateRow = ARGBAttenuateRow_LASX;
3650
    }
3651
  }
3652
#endif
3653
#if defined(HAS_ARGBATTENUATEROW_RVV)
3654
  if (TestCpuFlag(kCpuHasRVV)) {
3655
    ARGBAttenuateRow = ARGBAttenuateRow_RVV;
3656
  }
3657
#endif
3658
3659
1.55k
  for (y = 0; y < height; ++y) {
3660
776
    ARGBAttenuateRow(src_argb, dst_argb, width);
3661
776
    src_argb += src_stride_argb;
3662
776
    dst_argb += dst_stride_argb;
3663
776
  }
3664
776
  return 0;
3665
776
}
3666
3667
// Convert preattentuated ARGB to unattenuated ARGB.
3668
LIBYUV_API
3669
int ARGBUnattenuate(const uint8_t* src_argb,
3670
                    int src_stride_argb,
3671
                    uint8_t* dst_argb,
3672
                    int dst_stride_argb,
3673
                    int width,
3674
0
                    int height) {
3675
0
  int y;
3676
0
  void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3677
0
                             int width) = ARGBUnattenuateRow_C;
3678
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3679
0
    return -1;
3680
0
  }
3681
0
  if (height < 0) {
3682
0
    height = -height;
3683
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
3684
0
    src_stride_argb = -src_stride_argb;
3685
0
  }
3686
  // Coalesce rows.
3687
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3688
0
    width *= height;
3689
0
    height = 1;
3690
0
    src_stride_argb = dst_stride_argb = 0;
3691
0
  }
3692
0
#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
3693
0
  if (TestCpuFlag(kCpuHasSSE2)) {
3694
0
    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
3695
0
    if (IS_ALIGNED(width, 4)) {
3696
0
      ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
3697
0
    }
3698
0
  }
3699
0
#endif
3700
0
#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
3701
0
  if (TestCpuFlag(kCpuHasAVX2)) {
3702
0
    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
3703
0
    if (IS_ALIGNED(width, 8)) {
3704
0
      ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
3705
0
    }
3706
0
  }
3707
0
#endif
3708
  // TODO(fbarchard): Neon version.
3709
3710
0
  for (y = 0; y < height; ++y) {
3711
0
    ARGBUnattenuateRow(src_argb, dst_argb, width);
3712
0
    src_argb += src_stride_argb;
3713
0
    dst_argb += dst_stride_argb;
3714
0
  }
3715
0
  return 0;
3716
0
}
3717
3718
// Convert ARGB to Grayed ARGB.
3719
LIBYUV_API
3720
int ARGBGrayTo(const uint8_t* src_argb,
3721
               int src_stride_argb,
3722
               uint8_t* dst_argb,
3723
               int dst_stride_argb,
3724
               int width,
3725
0
               int height) {
3726
0
  int y;
3727
0
  void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3728
0
      ARGBGrayRow_C;
3729
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3730
0
    return -1;
3731
0
  }
3732
0
  if (height < 0) {
3733
0
    height = -height;
3734
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
3735
0
    src_stride_argb = -src_stride_argb;
3736
0
  }
3737
  // Coalesce rows.
3738
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3739
0
    width *= height;
3740
0
    height = 1;
3741
0
    src_stride_argb = dst_stride_argb = 0;
3742
0
  }
3743
0
#if defined(HAS_ARGBGRAYROW_SSSE3)
3744
0
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3745
0
    ARGBGrayRow = ARGBGrayRow_SSSE3;
3746
0
  }
3747
0
#endif
3748
#if defined(HAS_ARGBGRAYROW_NEON)
3749
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3750
    ARGBGrayRow = ARGBGrayRow_NEON;
3751
  }
3752
#endif
3753
#if defined(HAS_ARGBGRAYROW_NEON_DOTPROD)
3754
  if (TestCpuFlag(kCpuHasNeonDotProd) && IS_ALIGNED(width, 8)) {
3755
    ARGBGrayRow = ARGBGrayRow_NEON_DotProd;
3756
  }
3757
#endif
3758
#if defined(HAS_ARGBGRAYROW_LSX)
3759
  if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3760
    ARGBGrayRow = ARGBGrayRow_LSX;
3761
  }
3762
#endif
3763
#if defined(HAS_ARGBGRAYROW_LASX)
3764
  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3765
    ARGBGrayRow = ARGBGrayRow_LASX;
3766
  }
3767
#endif
3768
3769
0
  for (y = 0; y < height; ++y) {
3770
0
    ARGBGrayRow(src_argb, dst_argb, width);
3771
0
    src_argb += src_stride_argb;
3772
0
    dst_argb += dst_stride_argb;
3773
0
  }
3774
0
  return 0;
3775
0
}
3776
3777
// Make a rectangle of ARGB gray scale.
3778
LIBYUV_API
3779
int ARGBGray(uint8_t* dst_argb,
3780
             int dst_stride_argb,
3781
             int dst_x,
3782
             int dst_y,
3783
             int width,
3784
0
             int height) {
3785
0
  int y;
3786
0
  void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3787
0
      ARGBGrayRow_C;
3788
0
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3789
0
  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3790
0
    return -1;
3791
0
  }
3792
  // Coalesce rows.
3793
0
  if (dst_stride_argb == width * 4) {
3794
0
    width *= height;
3795
0
    height = 1;
3796
0
    dst_stride_argb = 0;
3797
0
  }
3798
0
#if defined(HAS_ARGBGRAYROW_SSSE3)
3799
0
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3800
0
    ARGBGrayRow = ARGBGrayRow_SSSE3;
3801
0
  }
3802
0
#endif
3803
#if defined(HAS_ARGBGRAYROW_NEON)
3804
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3805
    ARGBGrayRow = ARGBGrayRow_NEON;
3806
  }
3807
#endif
3808
#if defined(HAS_ARGBGRAYROW_NEON_DOTPROD)
3809
  if (TestCpuFlag(kCpuHasNeonDotProd) && IS_ALIGNED(width, 8)) {
3810
    ARGBGrayRow = ARGBGrayRow_NEON_DotProd;
3811
  }
3812
#endif
3813
#if defined(HAS_ARGBGRAYROW_LSX)
3814
  if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3815
    ARGBGrayRow = ARGBGrayRow_LSX;
3816
  }
3817
#endif
3818
#if defined(HAS_ARGBGRAYROW_LASX)
3819
  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3820
    ARGBGrayRow = ARGBGrayRow_LASX;
3821
  }
3822
#endif
3823
3824
0
  for (y = 0; y < height; ++y) {
3825
0
    ARGBGrayRow(dst, dst, width);
3826
0
    dst += dst_stride_argb;
3827
0
  }
3828
0
  return 0;
3829
0
}
3830
3831
// Make a rectangle of ARGB Sepia tone.
3832
LIBYUV_API
3833
int ARGBSepia(uint8_t* dst_argb,
3834
              int dst_stride_argb,
3835
              int dst_x,
3836
              int dst_y,
3837
              int width,
3838
0
              int height) {
3839
0
  int y;
3840
0
  void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
3841
0
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3842
0
  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3843
0
    return -1;
3844
0
  }
3845
  // Coalesce rows.
3846
0
  if (dst_stride_argb == width * 4) {
3847
0
    width *= height;
3848
0
    height = 1;
3849
0
    dst_stride_argb = 0;
3850
0
  }
3851
0
#if defined(HAS_ARGBSEPIAROW_SSSE3)
3852
0
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3853
0
    ARGBSepiaRow = ARGBSepiaRow_SSSE3;
3854
0
  }
3855
0
#endif
3856
#if defined(HAS_ARGBSEPIAROW_NEON)
3857
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3858
    ARGBSepiaRow = ARGBSepiaRow_NEON;
3859
  }
3860
#endif
3861
#if defined(HAS_ARGBSEPIAROW_NEON_DOTPROD)
3862
  if (TestCpuFlag(kCpuHasNeonDotProd) && IS_ALIGNED(width, 8)) {
3863
    ARGBSepiaRow = ARGBSepiaRow_NEON_DotProd;
3864
  }
3865
#endif
3866
#if defined(HAS_ARGBSEPIAROW_LSX)
3867
  if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3868
    ARGBSepiaRow = ARGBSepiaRow_LSX;
3869
  }
3870
#endif
3871
#if defined(HAS_ARGBSEPIAROW_LASX)
3872
  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3873
    ARGBSepiaRow = ARGBSepiaRow_LASX;
3874
  }
3875
#endif
3876
3877
0
  for (y = 0; y < height; ++y) {
3878
0
    ARGBSepiaRow(dst, width);
3879
0
    dst += dst_stride_argb;
3880
0
  }
3881
0
  return 0;
3882
0
}
3883
3884
// Apply a 4x4 matrix to each ARGB pixel.
3885
// Note: Normally for shading, but can be used to swizzle or invert.
3886
LIBYUV_API
3887
int ARGBColorMatrix(const uint8_t* src_argb,
3888
                    int src_stride_argb,
3889
                    uint8_t* dst_argb,
3890
                    int dst_stride_argb,
3891
                    const int8_t* matrix_argb,
3892
                    int width,
3893
0
                    int height) {
3894
0
  int y;
3895
0
  void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3896
0
                             const int8_t* matrix_argb, int width) =
3897
0
      ARGBColorMatrixRow_C;
3898
0
  if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
3899
0
    return -1;
3900
0
  }
3901
0
  if (height < 0) {
3902
0
    height = -height;
3903
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
3904
0
    src_stride_argb = -src_stride_argb;
3905
0
  }
3906
  // Coalesce rows.
3907
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3908
0
    width *= height;
3909
0
    height = 1;
3910
0
    src_stride_argb = dst_stride_argb = 0;
3911
0
  }
3912
0
#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
3913
0
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3914
0
    ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
3915
0
  }
3916
0
#endif
3917
#if defined(HAS_ARGBCOLORMATRIXROW_NEON)
3918
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3919
    ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
3920
  }
3921
#endif
3922
#if defined(HAS_ARGBCOLORMATRIXROW_NEON_I8MM)
3923
  if (TestCpuFlag(kCpuHasNeonI8MM) && IS_ALIGNED(width, 8)) {
3924
    ARGBColorMatrixRow = ARGBColorMatrixRow_NEON_I8MM;
3925
  }
3926
#endif
3927
#if defined(HAS_ARGBCOLORMATRIXROW_LSX)
3928
  if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3929
    ARGBColorMatrixRow = ARGBColorMatrixRow_LSX;
3930
  }
3931
#endif
3932
0
  for (y = 0; y < height; ++y) {
3933
0
    ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
3934
0
    src_argb += src_stride_argb;
3935
0
    dst_argb += dst_stride_argb;
3936
0
  }
3937
0
  return 0;
3938
0
}
3939
3940
// Apply a 4x3 matrix to each ARGB pixel.
3941
// Deprecated.
3942
LIBYUV_API
3943
int RGBColorMatrix(uint8_t* dst_argb,
3944
                   int dst_stride_argb,
3945
                   const int8_t* matrix_rgb,
3946
                   int dst_x,
3947
                   int dst_y,
3948
                   int width,
3949
0
                   int height) {
3950
0
  SIMD_ALIGNED(int8_t matrix_argb[16]);
3951
0
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3952
0
  if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
3953
0
      dst_y < 0) {
3954
0
    return -1;
3955
0
  }
3956
3957
  // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
3958
0
  matrix_argb[0] = matrix_rgb[0] / 2;
3959
0
  matrix_argb[1] = matrix_rgb[1] / 2;
3960
0
  matrix_argb[2] = matrix_rgb[2] / 2;
3961
0
  matrix_argb[3] = matrix_rgb[3] / 2;
3962
0
  matrix_argb[4] = matrix_rgb[4] / 2;
3963
0
  matrix_argb[5] = matrix_rgb[5] / 2;
3964
0
  matrix_argb[6] = matrix_rgb[6] / 2;
3965
0
  matrix_argb[7] = matrix_rgb[7] / 2;
3966
0
  matrix_argb[8] = matrix_rgb[8] / 2;
3967
0
  matrix_argb[9] = matrix_rgb[9] / 2;
3968
0
  matrix_argb[10] = matrix_rgb[10] / 2;
3969
0
  matrix_argb[11] = matrix_rgb[11] / 2;
3970
0
  matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
3971
0
  matrix_argb[15] = 64;  // 1.0
3972
3973
0
  return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
3974
0
                         dst_stride_argb, &matrix_argb[0], width, height);
3975
0
}
3976
3977
// Apply a color table each ARGB pixel.
3978
// Table contains 256 ARGB values.
3979
LIBYUV_API
3980
int ARGBColorTable(uint8_t* dst_argb,
3981
                   int dst_stride_argb,
3982
                   const uint8_t* table_argb,
3983
                   int dst_x,
3984
                   int dst_y,
3985
                   int width,
3986
0
                   int height) {
3987
0
  int y;
3988
0
  void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
3989
0
                            int width) = ARGBColorTableRow_C;
3990
0
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3991
0
  if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
3992
0
      dst_y < 0) {
3993
0
    return -1;
3994
0
  }
3995
  // Coalesce rows.
3996
0
  if (dst_stride_argb == width * 4) {
3997
0
    width *= height;
3998
0
    height = 1;
3999
0
    dst_stride_argb = 0;
4000
0
  }
4001
0
#if defined(HAS_ARGBCOLORTABLEROW_X86)
4002
0
  if (TestCpuFlag(kCpuHasX86)) {
4003
0
    ARGBColorTableRow = ARGBColorTableRow_X86;
4004
0
  }
4005
0
#endif
4006
0
  for (y = 0; y < height; ++y) {
4007
0
    ARGBColorTableRow(dst, table_argb, width);
4008
0
    dst += dst_stride_argb;
4009
0
  }
4010
0
  return 0;
4011
0
}
4012
4013
// Apply a color table each ARGB pixel but preserve destination alpha.
4014
// Table contains 256 ARGB values.
4015
LIBYUV_API
4016
int RGBColorTable(uint8_t* dst_argb,
4017
                  int dst_stride_argb,
4018
                  const uint8_t* table_argb,
4019
                  int dst_x,
4020
                  int dst_y,
4021
                  int width,
4022
0
                  int height) {
4023
0
  int y;
4024
0
  void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
4025
0
                           int width) = RGBColorTableRow_C;
4026
0
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
4027
0
  if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
4028
0
      dst_y < 0) {
4029
0
    return -1;
4030
0
  }
4031
  // Coalesce rows.
4032
0
  if (dst_stride_argb == width * 4) {
4033
0
    width *= height;
4034
0
    height = 1;
4035
0
    dst_stride_argb = 0;
4036
0
  }
4037
0
#if defined(HAS_RGBCOLORTABLEROW_X86)
4038
0
  if (TestCpuFlag(kCpuHasX86)) {
4039
0
    RGBColorTableRow = RGBColorTableRow_X86;
4040
0
  }
4041
0
#endif
4042
0
  for (y = 0; y < height; ++y) {
4043
0
    RGBColorTableRow(dst, table_argb, width);
4044
0
    dst += dst_stride_argb;
4045
0
  }
4046
0
  return 0;
4047
0
}
4048
4049
// ARGBQuantize is used to posterize art.
4050
// e.g. rgb / qvalue * qvalue + qvalue / 2
4051
// But the low levels implement efficiently with 3 parameters, and could be
4052
// used for other high level operations.
4053
// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
4054
// where scale is 1 / interval_size as a fixed point value.
4055
// The divide is replaces with a multiply by reciprocal fixed point multiply.
4056
// Caveat - although SSE2 saturates, the C function does not and should be used
4057
// with care if doing anything but quantization.
4058
LIBYUV_API
4059
int ARGBQuantize(uint8_t* dst_argb,
4060
                 int dst_stride_argb,
4061
                 int scale,
4062
                 int interval_size,
4063
                 int interval_offset,
4064
                 int dst_x,
4065
                 int dst_y,
4066
                 int width,
4067
0
                 int height) {
4068
0
  int y;
4069
0
  void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
4070
0
                          int interval_offset, int width) = ARGBQuantizeRow_C;
4071
0
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
4072
0
  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
4073
0
      interval_size < 1 || interval_size > 255) {
4074
0
    return -1;
4075
0
  }
4076
  // Coalesce rows.
4077
0
  if (dst_stride_argb == width * 4) {
4078
0
    width *= height;
4079
0
    height = 1;
4080
0
    dst_stride_argb = 0;
4081
0
  }
4082
0
#if defined(HAS_ARGBQUANTIZEROW_SSE2)
4083
0
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
4084
0
    ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
4085
0
  }
4086
0
#endif
4087
#if defined(HAS_ARGBQUANTIZEROW_NEON)
4088
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4089
    ARGBQuantizeRow = ARGBQuantizeRow_NEON;
4090
  }
4091
#endif
4092
#if defined(HAS_ARGBQUANTIZEROW_LSX)
4093
  if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
4094
    ARGBQuantizeRow = ARGBQuantizeRow_LSX;
4095
  }
4096
#endif
4097
0
  for (y = 0; y < height; ++y) {
4098
0
    ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
4099
0
    dst += dst_stride_argb;
4100
0
  }
4101
0
  return 0;
4102
0
}
4103
4104
// Computes table of cumulative sum for image where the value is the sum
4105
// of all values above and to the left of the entry. Used by ARGBBlur.
4106
LIBYUV_API
4107
int ARGBComputeCumulativeSum(const uint8_t* src_argb,
4108
                             int src_stride_argb,
4109
                             int32_t* dst_cumsum,
4110
                             int dst_stride32_cumsum,
4111
                             int width,
4112
0
                             int height) {
4113
0
  int y;
4114
0
  void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
4115
0
                                  const int32_t* previous_cumsum, int width) =
4116
0
      ComputeCumulativeSumRow_C;
4117
0
  int32_t* previous_cumsum = dst_cumsum;
4118
0
  if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
4119
0
    return -1;
4120
0
  }
4121
0
#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
4122
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4123
0
    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
4124
0
  }
4125
0
#endif
4126
4127
0
  memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
4128
0
  for (y = 0; y < height; ++y) {
4129
0
    ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
4130
0
    previous_cumsum = dst_cumsum;
4131
0
    dst_cumsum += dst_stride32_cumsum;
4132
0
    src_argb += src_stride_argb;
4133
0
  }
4134
0
  return 0;
4135
0
}
4136
4137
// Blur ARGB image.
4138
// Caller should allocate CumulativeSum table of width * height * 16 bytes
4139
// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
4140
// as the buffer is treated as circular.
4141
LIBYUV_API
4142
int ARGBBlur(const uint8_t* src_argb,
4143
             int src_stride_argb,
4144
             uint8_t* dst_argb,
4145
             int dst_stride_argb,
4146
             int32_t* dst_cumsum,
4147
             int dst_stride32_cumsum,
4148
             int width,
4149
             int height,
4150
0
             int radius) {
4151
0
  int y;
4152
0
  void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
4153
0
                                  const int32_t* previous_cumsum, int width) =
4154
0
      ComputeCumulativeSumRow_C;
4155
0
  void (*CumulativeSumToAverageRow)(
4156
0
      const int32_t* topleft, const int32_t* botleft, int width, int area,
4157
0
      uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
4158
0
  int32_t* cumsum_bot_row;
4159
0
  int32_t* max_cumsum_bot_row;
4160
0
  int32_t* cumsum_top_row;
4161
4162
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4163
0
    return -1;
4164
0
  }
4165
0
  if (height < 0) {
4166
0
    height = -height;
4167
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
4168
0
    src_stride_argb = -src_stride_argb;
4169
0
  }
4170
0
  if (radius > height) {
4171
0
    radius = height;
4172
0
  }
4173
0
  if (radius > (width / 2 - 1)) {
4174
0
    radius = width / 2 - 1;
4175
0
  }
4176
0
  if (radius <= 0 || height <= 1) {
4177
0
    return -1;
4178
0
  }
4179
0
#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
4180
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4181
0
    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
4182
0
    CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
4183
0
  }
4184
0
#endif
4185
  // Compute enough CumulativeSum for first row to be blurred. After this
4186
  // one row of CumulativeSum is updated at a time.
4187
0
  ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
4188
0
                           dst_stride32_cumsum, width, radius);
4189
4190
0
  src_argb = src_argb + radius * src_stride_argb;
4191
0
  cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
4192
4193
0
  max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
4194
0
  cumsum_top_row = &dst_cumsum[0];
4195
4196
0
  for (y = 0; y < height; ++y) {
4197
0
    int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
4198
0
    int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
4199
0
    int area = radius * (bot_y - top_y);
4200
0
    int boxwidth = radius * 4;
4201
0
    int x;
4202
0
    int n;
4203
4204
    // Increment cumsum_top_row pointer with circular buffer wrap around.
4205
0
    if (top_y) {
4206
0
      cumsum_top_row += dst_stride32_cumsum;
4207
0
      if (cumsum_top_row >= max_cumsum_bot_row) {
4208
0
        cumsum_top_row = dst_cumsum;
4209
0
      }
4210
0
    }
4211
    // Increment cumsum_bot_row pointer with circular buffer wrap around and
4212
    // then fill in a row of CumulativeSum.
4213
0
    if ((y + radius) < height) {
4214
0
      const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
4215
0
      cumsum_bot_row += dst_stride32_cumsum;
4216
0
      if (cumsum_bot_row >= max_cumsum_bot_row) {
4217
0
        cumsum_bot_row = dst_cumsum;
4218
0
      }
4219
0
      ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
4220
0
                              width);
4221
0
      src_argb += src_stride_argb;
4222
0
    }
4223
4224
    // Left clipped.
4225
0
    for (x = 0; x < radius + 1; ++x) {
4226
0
      CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
4227
0
                                &dst_argb[x * 4], 1);
4228
0
      area += (bot_y - top_y);
4229
0
      boxwidth += 4;
4230
0
    }
4231
4232
    // Middle unclipped.
4233
0
    n = (width - 1) - radius - x + 1;
4234
0
    CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
4235
0
                              &dst_argb[x * 4], n);
4236
4237
    // Right clipped.
4238
0
    for (x += n; x <= width - 1; ++x) {
4239
0
      area -= (bot_y - top_y);
4240
0
      boxwidth -= 4;
4241
0
      CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
4242
0
                                cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
4243
0
                                area, &dst_argb[x * 4], 1);
4244
0
    }
4245
0
    dst_argb += dst_stride_argb;
4246
0
  }
4247
0
  return 0;
4248
0
}
4249
4250
// Multiply ARGB image by a specified ARGB value.
4251
LIBYUV_API
4252
int ARGBShade(const uint8_t* src_argb,
4253
              int src_stride_argb,
4254
              uint8_t* dst_argb,
4255
              int dst_stride_argb,
4256
              int width,
4257
              int height,
4258
0
              uint32_t value) {
4259
0
  int y;
4260
0
  void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
4261
0
                       uint32_t value) = ARGBShadeRow_C;
4262
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
4263
0
    return -1;
4264
0
  }
4265
0
  if (height < 0) {
4266
0
    height = -height;
4267
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
4268
0
    src_stride_argb = -src_stride_argb;
4269
0
  }
4270
  // Coalesce rows.
4271
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4272
0
    width *= height;
4273
0
    height = 1;
4274
0
    src_stride_argb = dst_stride_argb = 0;
4275
0
  }
4276
0
#if defined(HAS_ARGBSHADEROW_SSE2)
4277
0
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
4278
0
    ARGBShadeRow = ARGBShadeRow_SSE2;
4279
0
  }
4280
0
#endif
4281
#if defined(HAS_ARGBSHADEROW_NEON)
4282
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4283
    ARGBShadeRow = ARGBShadeRow_NEON;
4284
  }
4285
#endif
4286
#if defined(HAS_ARGBSHADEROW_LSX)
4287
  if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) {
4288
    ARGBShadeRow = ARGBShadeRow_LSX;
4289
  }
4290
#endif
4291
#if defined(HAS_ARGBSHADEROW_LASX)
4292
  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
4293
    ARGBShadeRow = ARGBShadeRow_LASX;
4294
  }
4295
#endif
4296
4297
0
  for (y = 0; y < height; ++y) {
4298
0
    ARGBShadeRow(src_argb, dst_argb, width, value);
4299
0
    src_argb += src_stride_argb;
4300
0
    dst_argb += dst_stride_argb;
4301
0
  }
4302
0
  return 0;
4303
0
}
4304
4305
// Interpolate 2 planes by specified amount (0 to 255).
4306
LIBYUV_API
4307
int InterpolatePlane(const uint8_t* src0,
4308
                     int src_stride0,
4309
                     const uint8_t* src1,
4310
                     int src_stride1,
4311
                     uint8_t* dst,
4312
                     int dst_stride,
4313
                     int width,
4314
                     int height,
4315
0
                     int interpolation) {
4316
0
  int y;
4317
0
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
4318
0
                         ptrdiff_t src_stride, int dst_width,
4319
0
                         int source_y_fraction) = InterpolateRow_C;
4320
0
  if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
4321
0
    return -1;
4322
0
  }
4323
  // Negative height means invert the image.
4324
0
  if (height < 0) {
4325
0
    height = -height;
4326
0
    dst = dst + (height - 1) * dst_stride;
4327
0
    dst_stride = -dst_stride;
4328
0
  }
4329
  // Coalesce rows.
4330
0
  if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
4331
0
    width *= height;
4332
0
    height = 1;
4333
0
    src_stride0 = src_stride1 = dst_stride = 0;
4334
0
  }
4335
0
#if defined(HAS_INTERPOLATEROW_SSSE3)
4336
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
4337
0
    InterpolateRow = InterpolateRow_Any_SSSE3;
4338
0
    if (IS_ALIGNED(width, 16)) {
4339
0
      InterpolateRow = InterpolateRow_SSSE3;
4340
0
    }
4341
0
  }
4342
0
#endif
4343
0
#if defined(HAS_INTERPOLATEROW_AVX2)
4344
0
  if (TestCpuFlag(kCpuHasAVX2)) {
4345
0
    InterpolateRow = InterpolateRow_Any_AVX2;
4346
0
    if (IS_ALIGNED(width, 32)) {
4347
0
      InterpolateRow = InterpolateRow_AVX2;
4348
0
    }
4349
0
  }
4350
0
#endif
4351
#if defined(HAS_INTERPOLATEROW_NEON)
4352
  if (TestCpuFlag(kCpuHasNEON)) {
4353
    InterpolateRow = InterpolateRow_Any_NEON;
4354
    if (IS_ALIGNED(width, 16)) {
4355
      InterpolateRow = InterpolateRow_NEON;
4356
    }
4357
  }
4358
#endif
4359
#if defined(HAS_INTERPOLATEROW_SME)
4360
  if (TestCpuFlag(kCpuHasSME)) {
4361
    InterpolateRow = InterpolateRow_SME;
4362
  }
4363
#endif
4364
#if defined(HAS_INTERPOLATEROW_LSX)
4365
  if (TestCpuFlag(kCpuHasLSX)) {
4366
    InterpolateRow = InterpolateRow_Any_LSX;
4367
    if (IS_ALIGNED(width, 32)) {
4368
      InterpolateRow = InterpolateRow_LSX;
4369
    }
4370
  }
4371
#endif
4372
#if defined(HAS_INTERPOLATEROW_RVV)
4373
  if (TestCpuFlag(kCpuHasRVV)) {
4374
    InterpolateRow = InterpolateRow_RVV;
4375
  }
4376
#endif
4377
4378
0
  for (y = 0; y < height; ++y) {
4379
0
    InterpolateRow(dst, src0, src1 - src0, width, interpolation);
4380
0
    src0 += src_stride0;
4381
0
    src1 += src_stride1;
4382
0
    dst += dst_stride;
4383
0
  }
4384
0
  return 0;
4385
0
}
4386
4387
// Interpolate 2 planes by specified amount (0 to 255).
4388
LIBYUV_API
4389
int InterpolatePlane_16(const uint16_t* src0,
4390
                        int src_stride0,
4391
                        const uint16_t* src1,
4392
                        int src_stride1,
4393
                        uint16_t* dst,
4394
                        int dst_stride,
4395
                        int width,
4396
                        int height,
4397
0
                        int interpolation) {
4398
0
  int y;
4399
0
  void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
4400
0
                            ptrdiff_t src_stride, int dst_width,
4401
0
                            int source_y_fraction) = InterpolateRow_16_C;
4402
0
  if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
4403
0
    return -1;
4404
0
  }
4405
  // Negative height means invert the image.
4406
0
  if (height < 0) {
4407
0
    height = -height;
4408
0
    dst = dst + (height - 1) * dst_stride;
4409
0
    dst_stride = -dst_stride;
4410
0
  }
4411
  // Coalesce rows.
4412
0
  if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
4413
0
    width *= height;
4414
0
    height = 1;
4415
0
    src_stride0 = src_stride1 = dst_stride = 0;
4416
0
  }
4417
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
4418
  if (TestCpuFlag(kCpuHasSSSE3)) {
4419
    InterpolateRow_16 = InterpolateRow_16_Any_SSSE3;
4420
    if (IS_ALIGNED(width, 16)) {
4421
      InterpolateRow_16 = InterpolateRow_16_SSSE3;
4422
    }
4423
  }
4424
#endif
4425
#if defined(HAS_INTERPOLATEROW_16_AVX2)
4426
  if (TestCpuFlag(kCpuHasAVX2)) {
4427
    InterpolateRow_16 = InterpolateRow_16_Any_AVX2;
4428
    if (IS_ALIGNED(width, 32)) {
4429
      InterpolateRow_16 = InterpolateRow_16_AVX2;
4430
    }
4431
  }
4432
#endif
4433
#if defined(HAS_INTERPOLATEROW_16_NEON)
4434
  if (TestCpuFlag(kCpuHasNEON)) {
4435
    InterpolateRow_16 = InterpolateRow_16_Any_NEON;
4436
    if (IS_ALIGNED(width, 8)) {
4437
      InterpolateRow_16 = InterpolateRow_16_NEON;
4438
    }
4439
  }
4440
#endif
4441
#if defined(HAS_INTERPOLATEROW_16_SME)
4442
  if (TestCpuFlag(kCpuHasSME)) {
4443
    InterpolateRow_16 = InterpolateRow_16_SME;
4444
  }
4445
#endif
4446
#if defined(HAS_INTERPOLATEROW_16_LSX)
4447
  if (TestCpuFlag(kCpuHasLSX)) {
4448
    InterpolateRow_16 = InterpolateRow_16_Any_LSX;
4449
    if (IS_ALIGNED(width, 32)) {
4450
      InterpolateRow_16 = InterpolateRow_16_LSX;
4451
    }
4452
  }
4453
#endif
4454
4455
0
  for (y = 0; y < height; ++y) {
4456
0
    InterpolateRow_16(dst, src0, src1 - src0, width, interpolation);
4457
0
    src0 += src_stride0;
4458
0
    src1 += src_stride1;
4459
0
    dst += dst_stride;
4460
0
  }
4461
0
  return 0;
4462
0
}
4463
4464
// Interpolate 2 ARGB images by specified amount (0 to 255).
4465
LIBYUV_API
4466
int ARGBInterpolate(const uint8_t* src_argb0,
4467
                    int src_stride_argb0,
4468
                    const uint8_t* src_argb1,
4469
                    int src_stride_argb1,
4470
                    uint8_t* dst_argb,
4471
                    int dst_stride_argb,
4472
                    int width,
4473
                    int height,
4474
0
                    int interpolation) {
4475
0
  return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
4476
0
                          src_stride_argb1, dst_argb, dst_stride_argb,
4477
0
                          width * 4, height, interpolation);
4478
0
}
4479
4480
// Interpolate 2 YUV images by specified amount (0 to 255).
4481
LIBYUV_API
4482
int I420Interpolate(const uint8_t* src0_y,
4483
                    int src0_stride_y,
4484
                    const uint8_t* src0_u,
4485
                    int src0_stride_u,
4486
                    const uint8_t* src0_v,
4487
                    int src0_stride_v,
4488
                    const uint8_t* src1_y,
4489
                    int src1_stride_y,
4490
                    const uint8_t* src1_u,
4491
                    int src1_stride_u,
4492
                    const uint8_t* src1_v,
4493
                    int src1_stride_v,
4494
                    uint8_t* dst_y,
4495
                    int dst_stride_y,
4496
                    uint8_t* dst_u,
4497
                    int dst_stride_u,
4498
                    uint8_t* dst_v,
4499
                    int dst_stride_v,
4500
                    int width,
4501
                    int height,
4502
0
                    int interpolation) {
4503
0
  int halfwidth = (width + 1) >> 1;
4504
0
  int halfheight = (height + 1) >> 1;
4505
4506
0
  if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
4507
0
      !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
4508
0
    return -1;
4509
0
  }
4510
4511
0
  InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
4512
0
                   dst_stride_y, width, height, interpolation);
4513
0
  InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
4514
0
                   dst_stride_u, halfwidth, halfheight, interpolation);
4515
0
  InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
4516
0
                   dst_stride_v, halfwidth, halfheight, interpolation);
4517
0
  return 0;
4518
0
}
4519
4520
// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
4521
LIBYUV_API
4522
int ARGBShuffle(const uint8_t* src_bgra,
4523
                int src_stride_bgra,
4524
                uint8_t* dst_argb,
4525
                int dst_stride_argb,
4526
                const uint8_t* shuffler,
4527
                int width,
4528
0
                int height) {
4529
0
  int y;
4530
0
  void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
4531
0
                         const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
4532
0
  if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
4533
0
    return -1;
4534
0
  }
4535
  // Negative height means invert the image.
4536
0
  if (height < 0) {
4537
0
    height = -height;
4538
0
    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
4539
0
    src_stride_bgra = -src_stride_bgra;
4540
0
  }
4541
  // Coalesce rows.
4542
0
  if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
4543
0
    width *= height;
4544
0
    height = 1;
4545
0
    src_stride_bgra = dst_stride_argb = 0;
4546
0
  }
4547
0
#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
4548
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
4549
0
    ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
4550
0
    if (IS_ALIGNED(width, 8)) {
4551
0
      ARGBShuffleRow = ARGBShuffleRow_SSSE3;
4552
0
    }
4553
0
  }
4554
0
#endif
4555
0
#if defined(HAS_ARGBSHUFFLEROW_AVX2)
4556
0
  if (TestCpuFlag(kCpuHasAVX2)) {
4557
0
    ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
4558
0
    if (IS_ALIGNED(width, 16)) {
4559
0
      ARGBShuffleRow = ARGBShuffleRow_AVX2;
4560
0
    }
4561
0
  }
4562
0
#endif
4563
#if defined(HAS_ARGBSHUFFLEROW_NEON)
4564
  if (TestCpuFlag(kCpuHasNEON)) {
4565
    ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
4566
    if (IS_ALIGNED(width, 4)) {
4567
      ARGBShuffleRow = ARGBShuffleRow_NEON;
4568
    }
4569
  }
4570
#endif
4571
#if defined(HAS_ARGBSHUFFLEROW_LSX)
4572
  if (TestCpuFlag(kCpuHasLSX)) {
4573
    ARGBShuffleRow = ARGBShuffleRow_Any_LSX;
4574
    if (IS_ALIGNED(width, 8)) {
4575
      ARGBShuffleRow = ARGBShuffleRow_LSX;
4576
    }
4577
  }
4578
#endif
4579
#if defined(HAS_ARGBSHUFFLEROW_LASX)
4580
  if (TestCpuFlag(kCpuHasLASX)) {
4581
    ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
4582
    if (IS_ALIGNED(width, 16)) {
4583
      ARGBShuffleRow = ARGBShuffleRow_LASX;
4584
    }
4585
  }
4586
#endif
4587
4588
0
  for (y = 0; y < height; ++y) {
4589
0
    ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
4590
0
    src_bgra += src_stride_bgra;
4591
0
    dst_argb += dst_stride_argb;
4592
0
  }
4593
0
  return 0;
4594
0
}
4595
4596
// Shuffle AR64 channel order.  e.g. AR64 to AB64.
4597
LIBYUV_API
4598
int AR64Shuffle(const uint16_t* src_ar64,
4599
                int src_stride_ar64,
4600
                uint16_t* dst_ar64,
4601
                int dst_stride_ar64,
4602
                const uint8_t* shuffler,
4603
                int width,
4604
0
                int height) {
4605
0
  int y;
4606
0
  void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64,
4607
0
                         const uint8_t* shuffler, int width) = AR64ShuffleRow_C;
4608
0
  if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) {
4609
0
    return -1;
4610
0
  }
4611
  // Negative height means invert the image.
4612
0
  if (height < 0) {
4613
0
    height = -height;
4614
0
    src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
4615
0
    src_stride_ar64 = -src_stride_ar64;
4616
0
  }
4617
  // Coalesce rows.
4618
0
  if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) {
4619
0
    width *= height;
4620
0
    height = 1;
4621
0
    src_stride_ar64 = dst_stride_ar64 = 0;
4622
0
  }
4623
  // Assembly versions can be reused if it's implemented with shuffle.
4624
0
#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
4625
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
4626
0
    AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3;
4627
0
    if (IS_ALIGNED(width, 8)) {
4628
0
      AR64ShuffleRow = ARGBShuffleRow_SSSE3;
4629
0
    }
4630
0
  }
4631
0
#endif
4632
0
#if defined(HAS_ARGBSHUFFLEROW_AVX2)
4633
0
  if (TestCpuFlag(kCpuHasAVX2)) {
4634
0
    AR64ShuffleRow = ARGBShuffleRow_Any_AVX2;
4635
0
    if (IS_ALIGNED(width, 16)) {
4636
0
      AR64ShuffleRow = ARGBShuffleRow_AVX2;
4637
0
    }
4638
0
  }
4639
0
#endif
4640
#if defined(HAS_ARGBSHUFFLEROW_NEON)
4641
  if (TestCpuFlag(kCpuHasNEON)) {
4642
    AR64ShuffleRow = ARGBShuffleRow_Any_NEON;
4643
    if (IS_ALIGNED(width, 4)) {
4644
      AR64ShuffleRow = ARGBShuffleRow_NEON;
4645
    }
4646
  }
4647
#endif
4648
4649
0
  for (y = 0; y < height; ++y) {
4650
0
    AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler,
4651
0
                   width * 2);
4652
0
    src_ar64 += src_stride_ar64;
4653
0
    dst_ar64 += dst_stride_ar64;
4654
0
  }
4655
0
  return 0;
4656
0
}
4657
4658
// Gauss blur a float plane using Gaussian 5x5 filter with
4659
// coefficients of 1, 4, 6, 4, 1.
4660
// Each destination pixel is a blur of the 5x5
4661
// pixels from the source.
4662
// Source edges are clamped.
4663
// Edge is 2 pixels on each side, and interior is multiple of 4.
4664
LIBYUV_API
4665
int GaussPlane_F32(const float* src,
4666
                   int src_stride,
4667
                   float* dst,
4668
                   int dst_stride,
4669
                   int width,
4670
0
                   int height) {
4671
0
  int y;
4672
0
  void (*GaussCol_F32)(const float* src0, const float* src1, const float* src2,
4673
0
                       const float* src3, const float* src4, float* dst,
4674
0
                       int width) = GaussCol_F32_C;
4675
0
  void (*GaussRow_F32)(const float* src, float* dst, int width) =
4676
0
      GaussRow_F32_C;
4677
0
  if (!src || !dst || width <= 0 || height == 0) {
4678
0
    return -1;
4679
0
  }
4680
  // Negative height means invert the image.
4681
0
  if (height < 0) {
4682
0
    height = -height;
4683
0
    src = src + (height - 1) * src_stride;
4684
0
    src_stride = -src_stride;
4685
0
  }
4686
4687
#if defined(HAS_GAUSSCOL_F32_NEON)
4688
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4689
    GaussCol_F32 = GaussCol_F32_NEON;
4690
  }
4691
#endif
4692
#if defined(HAS_GAUSSROW_F32_NEON)
4693
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4694
    GaussRow_F32 = GaussRow_F32_NEON;
4695
  }
4696
#endif
4697
0
  {
4698
    // 2 pixels on each side, but aligned out to 16 bytes.
4699
0
    align_buffer_64(rowbuf, (4 + width + 4) * 4);
4700
0
    if (!rowbuf)
4701
0
      return 1;
4702
0
    memset(rowbuf, 0, 16);
4703
0
    memset(rowbuf + (4 + width) * 4, 0, 16);
4704
0
    float* row = (float*)(rowbuf + 16);
4705
0
    const float* src0 = src;
4706
0
    const float* src1 = src;
4707
0
    const float* src2 = src;
4708
0
    const float* src3 = src2 + ((height > 1) ? src_stride : 0);
4709
0
    const float* src4 = src3 + ((height > 2) ? src_stride : 0);
4710
4711
0
    for (y = 0; y < height; ++y) {
4712
0
      GaussCol_F32(src0, src1, src2, src3, src4, row, width);
4713
4714
      // Extrude edge by 2 floats
4715
0
      row[-2] = row[-1] = row[0];
4716
0
      row[width + 1] = row[width] = row[width - 1];
4717
4718
0
      GaussRow_F32(row - 2, dst, width);
4719
4720
0
      src0 = src1;
4721
0
      src1 = src2;
4722
0
      src2 = src3;
4723
0
      src3 = src4;
4724
0
      if ((y + 2) < (height - 1)) {
4725
0
        src4 += src_stride;
4726
0
      }
4727
0
      dst += dst_stride;
4728
0
    }
4729
0
    free_aligned_buffer_64(rowbuf);
4730
0
  }
4731
0
  return 0;
4732
0
}
4733
4734
// Sobel ARGB effect.
4735
static int ARGBSobelize(const uint8_t* src_argb,
4736
                        int src_stride_argb,
4737
                        uint8_t* dst_argb,
4738
                        int dst_stride_argb,
4739
                        int width,
4740
                        int height,
4741
                        void (*SobelRow)(const uint8_t* src_sobelx,
4742
                                         const uint8_t* src_sobely,
4743
                                         uint8_t* dst,
4744
0
                                         int width)) {
4745
0
  int y;
4746
0
  void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
4747
0
      ARGBToYJRow_C;
4748
0
  void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4749
0
                    uint8_t* dst_sobely, int width) = SobelYRow_C;
4750
0
  void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4751
0
                    const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
4752
0
      SobelXRow_C;
4753
0
  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
4754
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4755
0
    return -1;
4756
0
  }
4757
  // Negative height means invert the image.
4758
0
  if (height < 0) {
4759
0
    height = -height;
4760
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
4761
0
    src_stride_argb = -src_stride_argb;
4762
0
  }
4763
4764
0
#if defined(HAS_ARGBTOYJROW_SSSE3)
4765
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
4766
0
    ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
4767
0
    if (IS_ALIGNED(width, 16)) {
4768
0
      ARGBToYJRow = ARGBToYJRow_SSSE3;
4769
0
    }
4770
0
  }
4771
0
#endif
4772
0
#if defined(HAS_ARGBTOYJROW_AVX2)
4773
0
  if (TestCpuFlag(kCpuHasAVX2)) {
4774
0
    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
4775
0
    if (IS_ALIGNED(width, 32)) {
4776
0
      ARGBToYJRow = ARGBToYJRow_AVX2;
4777
0
    }
4778
0
  }
4779
0
#endif
4780
#if defined(HAS_ARGBTOYJROW_NEON)
4781
  if (TestCpuFlag(kCpuHasNEON)) {
4782
    ARGBToYJRow = ARGBToYJRow_Any_NEON;
4783
    if (IS_ALIGNED(width, 16)) {
4784
      ARGBToYJRow = ARGBToYJRow_NEON;
4785
    }
4786
  }
4787
#endif
4788
#if defined(HAS_ARGBTOYJROW_LSX)
4789
  if (TestCpuFlag(kCpuHasLSX)) {
4790
    ARGBToYJRow = ARGBToYJRow_Any_LSX;
4791
    if (IS_ALIGNED(width, 16)) {
4792
      ARGBToYJRow = ARGBToYJRow_LSX;
4793
    }
4794
  }
4795
#endif
4796
#if defined(HAS_ARGBTOYJROW_LASX)
4797
  if (TestCpuFlag(kCpuHasLASX)) {
4798
    ARGBToYJRow = ARGBToYJRow_Any_LASX;
4799
    if (IS_ALIGNED(width, 32)) {
4800
      ARGBToYJRow = ARGBToYJRow_LASX;
4801
    }
4802
  }
4803
#endif
4804
#if defined(HAS_ARGBTOYJROW_RVV)
4805
  if (TestCpuFlag(kCpuHasRVV)) {
4806
    ARGBToYJRow = ARGBToYJRow_RVV;
4807
  }
4808
#endif
4809
4810
0
#if defined(HAS_SOBELYROW_SSE2)
4811
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4812
0
    SobelYRow = SobelYRow_SSE2;
4813
0
  }
4814
0
#endif
4815
#if defined(HAS_SOBELYROW_NEON)
4816
  if (TestCpuFlag(kCpuHasNEON)) {
4817
    SobelYRow = SobelYRow_NEON;
4818
  }
4819
#endif
4820
0
#if defined(HAS_SOBELXROW_SSE2)
4821
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4822
0
    SobelXRow = SobelXRow_SSE2;
4823
0
  }
4824
0
#endif
4825
#if defined(HAS_SOBELXROW_NEON)
4826
  if (TestCpuFlag(kCpuHasNEON)) {
4827
    SobelXRow = SobelXRow_NEON;
4828
  }
4829
#endif
4830
0
  {
4831
    // 3 rows with edges before/after.
4832
0
    const int row_size = (width + kEdge + 31) & ~31;
4833
0
    align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
4834
0
    uint8_t* row_sobelx = rows;
4835
0
    uint8_t* row_sobely = rows + row_size;
4836
0
    uint8_t* row_y = rows + row_size * 2;
4837
4838
    // Convert first row.
4839
0
    uint8_t* row_y0 = row_y + kEdge;
4840
0
    uint8_t* row_y1 = row_y0 + row_size;
4841
0
    uint8_t* row_y2 = row_y1 + row_size;
4842
0
    if (!rows)
4843
0
      return 1;
4844
0
    ARGBToYJRow(src_argb, row_y0, width);
4845
0
    row_y0[-1] = row_y0[0];
4846
0
    memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
4847
0
    ARGBToYJRow(src_argb, row_y1, width);
4848
0
    row_y1[-1] = row_y1[0];
4849
0
    memset(row_y1 + width, row_y1[width - 1], 16);
4850
0
    memset(row_y2 + width, 0, 16);
4851
4852
0
    for (y = 0; y < height; ++y) {
4853
      // Convert next row of ARGB to G.
4854
0
      if (y < (height - 1)) {
4855
0
        src_argb += src_stride_argb;
4856
0
      }
4857
0
      ARGBToYJRow(src_argb, row_y2, width);
4858
0
      row_y2[-1] = row_y2[0];
4859
0
      row_y2[width] = row_y2[width - 1];
4860
4861
0
      SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
4862
0
      SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
4863
0
      SobelRow(row_sobelx, row_sobely, dst_argb, width);
4864
4865
      // Cycle thru circular queue of 3 row_y buffers.
4866
0
      {
4867
0
        uint8_t* row_yt = row_y0;
4868
0
        row_y0 = row_y1;
4869
0
        row_y1 = row_y2;
4870
0
        row_y2 = row_yt;
4871
0
      }
4872
4873
0
      dst_argb += dst_stride_argb;
4874
0
    }
4875
0
    free_aligned_buffer_64(rows);
4876
0
  }
4877
0
  return 0;
4878
0
}
4879
4880
// Sobel ARGB effect.
4881
LIBYUV_API
4882
int ARGBSobel(const uint8_t* src_argb,
4883
              int src_stride_argb,
4884
              uint8_t* dst_argb,
4885
              int dst_stride_argb,
4886
              int width,
4887
0
              int height) {
4888
0
  void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4889
0
                   uint8_t* dst_argb, int width) = SobelRow_C;
4890
0
#if defined(HAS_SOBELROW_SSE2)
4891
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4892
0
    SobelRow = SobelRow_Any_SSE2;
4893
0
    if (IS_ALIGNED(width, 16)) {
4894
0
      SobelRow = SobelRow_SSE2;
4895
0
    }
4896
0
  }
4897
0
#endif
4898
#if defined(HAS_SOBELROW_NEON)
4899
  if (TestCpuFlag(kCpuHasNEON)) {
4900
    SobelRow = SobelRow_Any_NEON;
4901
    if (IS_ALIGNED(width, 8)) {
4902
      SobelRow = SobelRow_NEON;
4903
    }
4904
  }
4905
#endif
4906
#if defined(HAS_SOBELROW_LSX)
4907
  if (TestCpuFlag(kCpuHasLSX)) {
4908
    SobelRow = SobelRow_Any_LSX;
4909
    if (IS_ALIGNED(width, 16)) {
4910
      SobelRow = SobelRow_LSX;
4911
    }
4912
  }
4913
#endif
4914
0
  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4915
0
                      width, height, SobelRow);
4916
0
}
4917
4918
// Sobel ARGB effect with planar output.
4919
LIBYUV_API
4920
int ARGBSobelToPlane(const uint8_t* src_argb,
4921
                     int src_stride_argb,
4922
                     uint8_t* dst_y,
4923
                     int dst_stride_y,
4924
                     int width,
4925
0
                     int height) {
4926
0
  void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4927
0
                          uint8_t* dst_, int width) = SobelToPlaneRow_C;
4928
0
#if defined(HAS_SOBELTOPLANEROW_SSE2)
4929
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4930
0
    SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
4931
0
    if (IS_ALIGNED(width, 16)) {
4932
0
      SobelToPlaneRow = SobelToPlaneRow_SSE2;
4933
0
    }
4934
0
  }
4935
0
#endif
4936
#if defined(HAS_SOBELTOPLANEROW_NEON)
4937
  if (TestCpuFlag(kCpuHasNEON)) {
4938
    SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
4939
    if (IS_ALIGNED(width, 16)) {
4940
      SobelToPlaneRow = SobelToPlaneRow_NEON;
4941
    }
4942
  }
4943
#endif
4944
#if defined(HAS_SOBELTOPLANEROW_LSX)
4945
  if (TestCpuFlag(kCpuHasLSX)) {
4946
    SobelToPlaneRow = SobelToPlaneRow_Any_LSX;
4947
    if (IS_ALIGNED(width, 32)) {
4948
      SobelToPlaneRow = SobelToPlaneRow_LSX;
4949
    }
4950
  }
4951
#endif
4952
0
  return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
4953
0
                      height, SobelToPlaneRow);
4954
0
}
4955
4956
// SobelXY ARGB effect.
4957
// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
4958
LIBYUV_API
4959
int ARGBSobelXY(const uint8_t* src_argb,
4960
                int src_stride_argb,
4961
                uint8_t* dst_argb,
4962
                int dst_stride_argb,
4963
                int width,
4964
0
                int height) {
4965
0
  void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4966
0
                     uint8_t* dst_argb, int width) = SobelXYRow_C;
4967
0
#if defined(HAS_SOBELXYROW_SSE2)
4968
0
  if (TestCpuFlag(kCpuHasSSE2)) {
4969
0
    SobelXYRow = SobelXYRow_Any_SSE2;
4970
0
    if (IS_ALIGNED(width, 16)) {
4971
0
      SobelXYRow = SobelXYRow_SSE2;
4972
0
    }
4973
0
  }
4974
0
#endif
4975
#if defined(HAS_SOBELXYROW_NEON)
4976
  if (TestCpuFlag(kCpuHasNEON)) {
4977
    SobelXYRow = SobelXYRow_Any_NEON;
4978
    if (IS_ALIGNED(width, 8)) {
4979
      SobelXYRow = SobelXYRow_NEON;
4980
    }
4981
  }
4982
#endif
4983
#if defined(HAS_SOBELXYROW_LSX)
4984
  if (TestCpuFlag(kCpuHasLSX)) {
4985
    SobelXYRow = SobelXYRow_Any_LSX;
4986
    if (IS_ALIGNED(width, 16)) {
4987
      SobelXYRow = SobelXYRow_LSX;
4988
    }
4989
  }
4990
#endif
4991
0
  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4992
0
                      width, height, SobelXYRow);
4993
0
}
4994
4995
// Apply a 4x4 polynomial to each ARGB pixel.
4996
LIBYUV_API
4997
int ARGBPolynomial(const uint8_t* src_argb,
4998
                   int src_stride_argb,
4999
                   uint8_t* dst_argb,
5000
                   int dst_stride_argb,
5001
                   const float* poly,
5002
                   int width,
5003
0
                   int height) {
5004
0
  int y;
5005
0
  void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
5006
0
                            const float* poly, int width) = ARGBPolynomialRow_C;
5007
0
  if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
5008
0
    return -1;
5009
0
  }
5010
  // Negative height means invert the image.
5011
0
  if (height < 0) {
5012
0
    height = -height;
5013
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
5014
0
    src_stride_argb = -src_stride_argb;
5015
0
  }
5016
  // Coalesce rows.
5017
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
5018
0
    width *= height;
5019
0
    height = 1;
5020
0
    src_stride_argb = dst_stride_argb = 0;
5021
0
  }
5022
0
#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
5023
0
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
5024
0
    ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
5025
0
  }
5026
0
#endif
5027
0
#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
5028
0
  if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
5029
0
      IS_ALIGNED(width, 2)) {
5030
0
    ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
5031
0
  }
5032
0
#endif
5033
5034
0
  for (y = 0; y < height; ++y) {
5035
0
    ARGBPolynomialRow(src_argb, dst_argb, poly, width);
5036
0
    src_argb += src_stride_argb;
5037
0
    dst_argb += dst_stride_argb;
5038
0
  }
5039
0
  return 0;
5040
0
}
5041
5042
// Convert plane of 16 bit shorts to half floats.
5043
// Source values are multiplied by scale before storing as half float.
5044
LIBYUV_API
5045
int HalfFloatPlane(const uint16_t* src_y,
5046
                   int src_stride_y,
5047
                   uint16_t* dst_y,
5048
                   int dst_stride_y,
5049
                   float scale,
5050
                   int width,
5051
282
                   int height) {
5052
282
  int y;
5053
282
  void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
5054
282
                       int width) = HalfFloatRow_C;
5055
282
  if (!src_y || !dst_y || width <= 0 || height == 0) {
5056
0
    return -1;
5057
0
  }
5058
282
  src_stride_y >>= 1;
5059
282
  dst_stride_y >>= 1;
5060
  // Negative height means invert the image.
5061
282
  if (height < 0) {
5062
0
    height = -height;
5063
0
    src_y = src_y + (height - 1) * src_stride_y;
5064
0
    src_stride_y = -src_stride_y;
5065
0
  }
5066
  // Coalesce rows.
5067
282
  if (src_stride_y == width && dst_stride_y == width) {
5068
282
    width *= height;
5069
282
    height = 1;
5070
282
    src_stride_y = dst_stride_y = 0;
5071
282
  }
5072
282
#if defined(HAS_HALFFLOATROW_SSE2)
5073
282
  if (TestCpuFlag(kCpuHasSSE2)) {
5074
282
    HalfFloatRow = HalfFloatRow_Any_SSE2;
5075
282
    if (IS_ALIGNED(width, 8)) {
5076
158
      HalfFloatRow = HalfFloatRow_SSE2;
5077
158
    }
5078
282
  }
5079
282
#endif
5080
282
#if defined(HAS_HALFFLOATROW_AVX2)
5081
282
  if (TestCpuFlag(kCpuHasAVX2)) {
5082
282
    HalfFloatRow = HalfFloatRow_Any_AVX2;
5083
282
    if (IS_ALIGNED(width, 16)) {
5084
61
      HalfFloatRow = HalfFloatRow_AVX2;
5085
61
    }
5086
282
  }
5087
282
#endif
5088
#if defined(HAS_HALFFLOATROW_F16C)
5089
  if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
5090
    HalfFloatRow =
5091
        (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
5092
    if (IS_ALIGNED(width, 16)) {
5093
      HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
5094
    }
5095
  }
5096
#endif
5097
#if defined(HAS_HALFFLOATROW_NEON)
5098
  if (TestCpuFlag(kCpuHasNEON)
5099
#if defined(__arm__)
5100
      // When scale is 1/65535 the scale * 2^-112 used to convert is a denormal.
5101
      // But when Neon vmul is asked to multiply a normal float by that
5102
      // denormal scale, even though the result would have been normal, it
5103
      // flushes to zero.  The scalar version of vmul supports denormals.
5104
      && scale >= 1.0f / 4096.0f
5105
#endif
5106
  ) {
5107
    HalfFloatRow = HalfFloatRow_Any_NEON;
5108
    if (IS_ALIGNED(width, 16)) {
5109
      HalfFloatRow = HalfFloatRow_NEON;
5110
    }
5111
  }
5112
#endif
5113
#if defined(HAS_HALFFLOATROW_SVE2)
5114
  if (TestCpuFlag(kCpuHasSVE2)) {
5115
    HalfFloatRow = scale == 1.0f ? HalfFloat1Row_SVE2 : HalfFloatRow_SVE2;
5116
  }
5117
#endif
5118
#if defined(HAS_HALFFLOATROW_LSX)
5119
  if (TestCpuFlag(kCpuHasLSX)) {
5120
    HalfFloatRow = HalfFloatRow_Any_LSX;
5121
    if (IS_ALIGNED(width, 32)) {
5122
      HalfFloatRow = HalfFloatRow_LSX;
5123
    }
5124
  }
5125
#endif
5126
5127
564
  for (y = 0; y < height; ++y) {
5128
282
    HalfFloatRow(src_y, dst_y, scale, width);
5129
282
    src_y += src_stride_y;
5130
282
    dst_y += dst_stride_y;
5131
282
  }
5132
282
  return 0;
5133
282
}
5134
5135
// Convert a buffer of bytes to floats, scale the values and store as floats.
5136
LIBYUV_API
5137
0
int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
5138
0
  void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
5139
0
                         int width) = ByteToFloatRow_C;
5140
0
  if (!src_y || !dst_y || width <= 0) {
5141
0
    return -1;
5142
0
  }
5143
#if defined(HAS_BYTETOFLOATROW_NEON)
5144
  if (TestCpuFlag(kCpuHasNEON)) {
5145
    ByteToFloatRow = ByteToFloatRow_Any_NEON;
5146
    if (IS_ALIGNED(width, 8)) {
5147
      ByteToFloatRow = ByteToFloatRow_NEON;
5148
    }
5149
  }
5150
#endif
5151
5152
0
  ByteToFloatRow(src_y, dst_y, scale, width);
5153
0
  return 0;
5154
0
}
5155
5156
// Apply a lumacolortable to each ARGB pixel.
5157
LIBYUV_API
5158
int ARGBLumaColorTable(const uint8_t* src_argb,
5159
                       int src_stride_argb,
5160
                       uint8_t* dst_argb,
5161
                       int dst_stride_argb,
5162
                       const uint8_t* luma,
5163
                       int width,
5164
0
                       int height) {
5165
0
  int y;
5166
0
  void (*ARGBLumaColorTableRow)(
5167
0
      const uint8_t* src_argb, uint8_t* dst_argb, int width,
5168
0
      const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
5169
0
  if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
5170
0
    return -1;
5171
0
  }
5172
  // Negative height means invert the image.
5173
0
  if (height < 0) {
5174
0
    height = -height;
5175
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
5176
0
    src_stride_argb = -src_stride_argb;
5177
0
  }
5178
  // Coalesce rows.
5179
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
5180
0
    width *= height;
5181
0
    height = 1;
5182
0
    src_stride_argb = dst_stride_argb = 0;
5183
0
  }
5184
0
#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
5185
0
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
5186
0
    ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
5187
0
  }
5188
0
#endif
5189
5190
0
  for (y = 0; y < height; ++y) {
5191
0
    ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
5192
0
    src_argb += src_stride_argb;
5193
0
    dst_argb += dst_stride_argb;
5194
0
  }
5195
0
  return 0;
5196
0
}
5197
5198
// Copy Alpha from one ARGB image to another.
5199
LIBYUV_API
5200
int ARGBCopyAlpha(const uint8_t* src_argb,
5201
                  int src_stride_argb,
5202
                  uint8_t* dst_argb,
5203
                  int dst_stride_argb,
5204
                  int width,
5205
0
                  int height) {
5206
0
  int y;
5207
0
  void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
5208
0
                           int width) = ARGBCopyAlphaRow_C;
5209
0
  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
5210
0
    return -1;
5211
0
  }
5212
  // Negative height means invert the image.
5213
0
  if (height < 0) {
5214
0
    height = -height;
5215
0
    src_argb = src_argb + (height - 1) * src_stride_argb;
5216
0
    src_stride_argb = -src_stride_argb;
5217
0
  }
5218
  // Coalesce rows.
5219
0
  if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
5220
0
    width *= height;
5221
0
    height = 1;
5222
0
    src_stride_argb = dst_stride_argb = 0;
5223
0
  }
5224
0
#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
5225
0
  if (TestCpuFlag(kCpuHasSSE2)) {
5226
0
    ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
5227
0
    if (IS_ALIGNED(width, 8)) {
5228
0
      ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
5229
0
    }
5230
0
  }
5231
0
#endif
5232
0
#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
5233
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5234
0
    ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
5235
0
    if (IS_ALIGNED(width, 16)) {
5236
0
      ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
5237
0
    }
5238
0
  }
5239
0
#endif
5240
5241
0
  for (y = 0; y < height; ++y) {
5242
0
    ARGBCopyAlphaRow(src_argb, dst_argb, width);
5243
0
    src_argb += src_stride_argb;
5244
0
    dst_argb += dst_stride_argb;
5245
0
  }
5246
0
  return 0;
5247
0
}
5248
5249
// Extract just the alpha channel from ARGB.
5250
LIBYUV_API
5251
int ARGBExtractAlpha(const uint8_t* src_argb,
5252
                     int src_stride_argb,
5253
                     uint8_t* dst_a,
5254
                     int dst_stride_a,
5255
                     int width,
5256
0
                     int height) {
5257
0
  if (!src_argb || !dst_a || width <= 0 || height == 0) {
5258
0
    return -1;
5259
0
  }
5260
  // Negative height means invert the image.
5261
0
  if (height < 0) {
5262
0
    height = -height;
5263
0
    src_argb += (height - 1) * src_stride_argb;
5264
0
    src_stride_argb = -src_stride_argb;
5265
0
  }
5266
  // Coalesce rows.
5267
0
  if (src_stride_argb == width * 4 && dst_stride_a == width) {
5268
0
    width *= height;
5269
0
    height = 1;
5270
0
    src_stride_argb = dst_stride_a = 0;
5271
0
  }
5272
0
  void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
5273
0
                              int width) = ARGBExtractAlphaRow_C;
5274
0
#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
5275
0
  if (TestCpuFlag(kCpuHasSSE2)) {
5276
0
    ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
5277
0
                                               : ARGBExtractAlphaRow_Any_SSE2;
5278
0
  }
5279
0
#endif
5280
0
#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
5281
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5282
0
    ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
5283
0
                                                : ARGBExtractAlphaRow_Any_AVX2;
5284
0
  }
5285
0
#endif
5286
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
5287
  if (TestCpuFlag(kCpuHasNEON)) {
5288
    ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
5289
                                                : ARGBExtractAlphaRow_Any_NEON;
5290
  }
5291
#endif
5292
#if defined(HAS_ARGBEXTRACTALPHAROW_LSX)
5293
  if (TestCpuFlag(kCpuHasLSX)) {
5294
    ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX
5295
                                                : ARGBExtractAlphaRow_Any_LSX;
5296
  }
5297
#endif
5298
#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
5299
  if (TestCpuFlag(kCpuHasRVV)) {
5300
    ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
5301
  }
5302
#endif
5303
5304
0
  for (int y = 0; y < height; ++y) {
5305
0
    ARGBExtractAlphaRow(src_argb, dst_a, width);
5306
0
    src_argb += src_stride_argb;
5307
0
    dst_a += dst_stride_a;
5308
0
  }
5309
0
  return 0;
5310
0
}
5311
5312
// Copy a planar Y channel to the alpha channel of a destination ARGB image.
5313
LIBYUV_API
5314
int ARGBCopyYToAlpha(const uint8_t* src_y,
5315
                     int src_stride_y,
5316
                     uint8_t* dst_argb,
5317
                     int dst_stride_argb,
5318
                     int width,
5319
0
                     int height) {
5320
0
  int y;
5321
0
  void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
5322
0
                              int width) = ARGBCopyYToAlphaRow_C;
5323
0
  if (!src_y || !dst_argb || width <= 0 || height == 0) {
5324
0
    return -1;
5325
0
  }
5326
  // Negative height means invert the image.
5327
0
  if (height < 0) {
5328
0
    height = -height;
5329
0
    src_y = src_y + (height - 1) * src_stride_y;
5330
0
    src_stride_y = -src_stride_y;
5331
0
  }
5332
  // Coalesce rows.
5333
0
  if (src_stride_y == width && dst_stride_argb == width * 4) {
5334
0
    width *= height;
5335
0
    height = 1;
5336
0
    src_stride_y = dst_stride_argb = 0;
5337
0
  }
5338
0
#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
5339
0
  if (TestCpuFlag(kCpuHasSSE2)) {
5340
0
    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
5341
0
    if (IS_ALIGNED(width, 8)) {
5342
0
      ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
5343
0
    }
5344
0
  }
5345
0
#endif
5346
0
#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
5347
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5348
0
    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
5349
0
    if (IS_ALIGNED(width, 16)) {
5350
0
      ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
5351
0
    }
5352
0
  }
5353
0
#endif
5354
#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV)
5355
  if (TestCpuFlag(kCpuHasRVV)) {
5356
    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV;
5357
  }
5358
#endif
5359
5360
0
  for (y = 0; y < height; ++y) {
5361
0
    ARGBCopyYToAlphaRow(src_y, dst_argb, width);
5362
0
    src_y += src_stride_y;
5363
0
    dst_argb += dst_stride_argb;
5364
0
  }
5365
0
  return 0;
5366
0
}
5367
5368
LIBYUV_API
5369
int YUY2ToNV12(const uint8_t* src_yuy2,
5370
               int src_stride_yuy2,
5371
               uint8_t* dst_y,
5372
               int dst_stride_y,
5373
               uint8_t* dst_uv,
5374
               int dst_stride_uv,
5375
               int width,
5376
0
               int height) {
5377
0
  int y;
5378
0
  void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
5379
0
      YUY2ToYRow_C;
5380
0
  void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2,
5381
0
                        uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C;
5382
0
  if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
5383
0
    return -1;
5384
0
  }
5385
5386
  // Negative height means invert the image.
5387
0
  if (height < 0) {
5388
0
    height = -height;
5389
0
    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
5390
0
    src_stride_yuy2 = -src_stride_yuy2;
5391
0
  }
5392
0
#if defined(HAS_YUY2TOYROW_SSE2)
5393
0
  if (TestCpuFlag(kCpuHasSSE2)) {
5394
0
    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
5395
0
    if (IS_ALIGNED(width, 16)) {
5396
0
      YUY2ToYRow = YUY2ToYRow_SSE2;
5397
0
    }
5398
0
  }
5399
0
#endif
5400
0
#if defined(HAS_YUY2TOYROW_AVX2)
5401
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5402
0
    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
5403
0
    if (IS_ALIGNED(width, 32)) {
5404
0
      YUY2ToYRow = YUY2ToYRow_AVX2;
5405
0
    }
5406
0
  }
5407
0
#endif
5408
#if defined(HAS_YUY2TOYROW_NEON)
5409
  if (TestCpuFlag(kCpuHasNEON)) {
5410
    YUY2ToYRow = YUY2ToYRow_Any_NEON;
5411
    if (IS_ALIGNED(width, 16)) {
5412
      YUY2ToYRow = YUY2ToYRow_NEON;
5413
    }
5414
  }
5415
#endif
5416
#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
5417
  if (TestCpuFlag(kCpuHasLSX)) {
5418
    YUY2ToYRow = YUY2ToYRow_Any_LSX;
5419
    if (IS_ALIGNED(width, 16)) {
5420
      YUY2ToYRow = YUY2ToYRow_LSX;
5421
    }
5422
  }
5423
#endif
5424
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
5425
  if (TestCpuFlag(kCpuHasLASX)) {
5426
    YUY2ToYRow = YUY2ToYRow_Any_LASX;
5427
    if (IS_ALIGNED(width, 32)) {
5428
      YUY2ToYRow = YUY2ToYRow_LASX;
5429
    }
5430
  }
5431
#endif
5432
5433
0
#if defined(HAS_YUY2TONVUVROW_SSE2)
5434
0
  if (TestCpuFlag(kCpuHasSSE2)) {
5435
0
    YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2;
5436
0
    if (IS_ALIGNED(width, 16)) {
5437
0
      YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2;
5438
0
    }
5439
0
  }
5440
0
#endif
5441
0
#if defined(HAS_YUY2TONVUVROW_AVX2)
5442
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5443
0
    YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2;
5444
0
    if (IS_ALIGNED(width, 32)) {
5445
0
      YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2;
5446
0
    }
5447
0
  }
5448
0
#endif
5449
#if defined(HAS_YUY2TONVUVROW_NEON)
5450
  if (TestCpuFlag(kCpuHasNEON)) {
5451
    YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON;
5452
    if (IS_ALIGNED(width, 16)) {
5453
      YUY2ToNVUVRow = YUY2ToNVUVRow_NEON;
5454
    }
5455
  }
5456
#endif
5457
5458
0
  for (y = 0; y < height - 1; y += 2) {
5459
0
    YUY2ToYRow(src_yuy2, dst_y, width);
5460
0
    YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
5461
0
    YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width);
5462
0
    src_yuy2 += src_stride_yuy2 * 2;
5463
0
    dst_y += dst_stride_y * 2;
5464
0
    dst_uv += dst_stride_uv;
5465
0
  }
5466
0
  if (height & 1) {
5467
0
    YUY2ToYRow(src_yuy2, dst_y, width);
5468
0
    YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width);
5469
0
  }
5470
0
  return 0;
5471
0
}
5472
5473
LIBYUV_API
5474
int UYVYToNV12(const uint8_t* src_uyvy,
5475
               int src_stride_uyvy,
5476
               uint8_t* dst_y,
5477
               int dst_stride_y,
5478
               uint8_t* dst_uv,
5479
               int dst_stride_uv,
5480
               int width,
5481
0
               int height) {
5482
0
  int y;
5483
0
  int halfwidth = (width + 1) >> 1;
5484
0
  void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
5485
0
                     int width) = SplitUVRow_C;
5486
0
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
5487
0
                         ptrdiff_t src_stride, int dst_width,
5488
0
                         int source_y_fraction) = InterpolateRow_C;
5489
5490
0
  if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
5491
0
    return -1;
5492
0
  }
5493
5494
  // Negative height means invert the image.
5495
0
  if (height < 0) {
5496
0
    height = -height;
5497
0
    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
5498
0
    src_stride_uyvy = -src_stride_uyvy;
5499
0
  }
5500
0
#if defined(HAS_SPLITUVROW_SSE2)
5501
0
  if (TestCpuFlag(kCpuHasSSE2)) {
5502
0
    SplitUVRow = SplitUVRow_Any_SSE2;
5503
0
    if (IS_ALIGNED(width, 16)) {
5504
0
      SplitUVRow = SplitUVRow_SSE2;
5505
0
    }
5506
0
  }
5507
0
#endif
5508
0
#if defined(HAS_SPLITUVROW_AVX2)
5509
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5510
0
    SplitUVRow = SplitUVRow_Any_AVX2;
5511
0
    if (IS_ALIGNED(width, 32)) {
5512
0
      SplitUVRow = SplitUVRow_AVX2;
5513
0
    }
5514
0
  }
5515
0
#endif
5516
#if defined(HAS_SPLITUVROW_NEON)
5517
  if (TestCpuFlag(kCpuHasNEON)) {
5518
    SplitUVRow = SplitUVRow_Any_NEON;
5519
    if (IS_ALIGNED(width, 16)) {
5520
      SplitUVRow = SplitUVRow_NEON;
5521
    }
5522
  }
5523
#endif
5524
#if defined(HAS_SPLITUVROW_LSX)
5525
  if (TestCpuFlag(kCpuHasLSX)) {
5526
    SplitUVRow = SplitUVRow_Any_LSX;
5527
    if (IS_ALIGNED(width, 32)) {
5528
      SplitUVRow = SplitUVRow_LSX;
5529
    }
5530
  }
5531
#endif
5532
#if defined(HAS_SPLITUVROW_RVV)
5533
  if (TestCpuFlag(kCpuHasRVV)) {
5534
    SplitUVRow = SplitUVRow_RVV;
5535
  }
5536
#endif
5537
5538
0
#if defined(HAS_INTERPOLATEROW_SSSE3)
5539
0
  if (TestCpuFlag(kCpuHasSSSE3)) {
5540
0
    InterpolateRow = InterpolateRow_Any_SSSE3;
5541
0
    if (IS_ALIGNED(width, 16)) {
5542
0
      InterpolateRow = InterpolateRow_SSSE3;
5543
0
    }
5544
0
  }
5545
0
#endif
5546
0
#if defined(HAS_INTERPOLATEROW_AVX2)
5547
0
  if (TestCpuFlag(kCpuHasAVX2)) {
5548
0
    InterpolateRow = InterpolateRow_Any_AVX2;
5549
0
    if (IS_ALIGNED(width, 32)) {
5550
0
      InterpolateRow = InterpolateRow_AVX2;
5551
0
    }
5552
0
  }
5553
0
#endif
5554
#if defined(HAS_INTERPOLATEROW_NEON)
5555
  if (TestCpuFlag(kCpuHasNEON)) {
5556
    InterpolateRow = InterpolateRow_Any_NEON;
5557
    if (IS_ALIGNED(width, 16)) {
5558
      InterpolateRow = InterpolateRow_NEON;
5559
    }
5560
  }
5561
#endif
5562
#if defined(HAS_INTERPOLATEROW_SME)
5563
  if (TestCpuFlag(kCpuHasSME)) {
5564
    InterpolateRow = InterpolateRow_SME;
5565
  }
5566
#endif
5567
#if defined(HAS_INTERPOLATEROW_LSX)
5568
  if (TestCpuFlag(kCpuHasLSX)) {
5569
    InterpolateRow = InterpolateRow_Any_LSX;
5570
    if (IS_ALIGNED(width, 32)) {
5571
      InterpolateRow = InterpolateRow_LSX;
5572
    }
5573
  }
5574
#endif
5575
#if defined(HAS_INTERPOLATEROW_RVV)
5576
  if (TestCpuFlag(kCpuHasRVV)) {
5577
    InterpolateRow = InterpolateRow_RVV;
5578
  }
5579
#endif
5580
5581
0
  {
5582
0
    int awidth = halfwidth * 2;
5583
    // row of y and 2 rows of uv
5584
0
    align_buffer_64(rows, awidth * 3);
5585
0
    if (!rows)
5586
0
      return 1;
5587
5588
0
    for (y = 0; y < height - 1; y += 2) {
5589
      // Split Y from UV.
5590
0
      SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
5591
0
      memcpy(dst_y, rows, width);
5592
0
      SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
5593
0
      memcpy(dst_y + dst_stride_y, rows, width);
5594
0
      InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
5595
0
      src_uyvy += src_stride_uyvy * 2;
5596
0
      dst_y += dst_stride_y * 2;
5597
0
      dst_uv += dst_stride_uv;
5598
0
    }
5599
0
    if (height & 1) {
5600
      // Split Y from UV.
5601
0
      SplitUVRow(src_uyvy, dst_uv, rows, awidth);
5602
0
      memcpy(dst_y, rows, width);
5603
0
    }
5604
0
    free_aligned_buffer_64(rows);
5605
0
  }
5606
0
  return 0;
5607
0
}
5608
5609
// width and height are src size allowing odd size handling.
5610
LIBYUV_API
5611
void HalfMergeUVPlane(const uint8_t* src_u,
5612
                      int src_stride_u,
5613
                      const uint8_t* src_v,
5614
                      int src_stride_v,
5615
                      uint8_t* dst_uv,
5616
                      int dst_stride_uv,
5617
                      int width,
5618
0
                      int height) {
5619
0
  int y;
5620
0
  void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u,
5621
0
                         const uint8_t* src_v, int src_stride_v,
5622
0
                         uint8_t* dst_uv, int width) = HalfMergeUVRow_C;
5623
5624
  // Negative height means invert the image.
5625
0
  if (height < 0) {
5626
0
    height = -height;
5627
0
    src_u = src_u + (height - 1) * src_stride_u;
5628
0
    src_v = src_v + (height - 1) * src_stride_v;
5629
0
    src_stride_u = -src_stride_u;
5630
0
    src_stride_v = -src_stride_v;
5631
0
  }
5632
#if defined(HAS_HALFMERGEUVROW_NEON)
5633
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
5634
    HalfMergeUVRow = HalfMergeUVRow_NEON;
5635
  }
5636
#endif
5637
0
#if defined(HAS_HALFMERGEUVROW_SSSE3)
5638
0
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
5639
0
    HalfMergeUVRow = HalfMergeUVRow_SSSE3;
5640
0
  }
5641
0
#endif
5642
0
#if defined(HAS_HALFMERGEUVROW_AVX2)
5643
0
  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
5644
0
    HalfMergeUVRow = HalfMergeUVRow_AVX2;
5645
0
  }
5646
0
#endif
5647
5648
0
  for (y = 0; y < height - 1; y += 2) {
5649
    // Merge a row of U and V into a row of UV.
5650
0
    HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
5651
0
    src_u += src_stride_u * 2;
5652
0
    src_v += src_stride_v * 2;
5653
0
    dst_uv += dst_stride_uv;
5654
0
  }
5655
0
  if (height & 1) {
5656
0
    HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width);
5657
0
  }
5658
0
}
5659
5660
#ifdef __cplusplus
5661
}  // extern "C"
5662
}  // namespace libyuv
5663
#endif