Coverage Report

Created: 2025-10-28 07:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/row_common.cc
Line
Count
Source
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/row.h"
12
13
#include <assert.h>
14
#include <string.h>  // For memcpy and memset.
15
16
#include "libyuv/basic_types.h"
17
#include "libyuv/convert_argb.h"  // For kYuvI601Constants
18
19
#ifdef __cplusplus
20
namespace libyuv {
21
extern "C" {
22
#endif
23
24
#ifdef __cplusplus
25
4.96G
#define STATIC_CAST(type, expr) static_cast<type>(expr)
26
#else
27
#define STATIC_CAST(type, expr) (type)(expr)
28
#endif
29
30
// This macro controls YUV to RGB using unsigned math to extend range of
31
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
32
// LIBYUV_UNLIMITED_DATA
33
34
// Macros to enable unlimited data for each colorspace
35
// LIBYUV_UNLIMITED_BT601
36
// LIBYUV_UNLIMITED_BT709
37
// LIBYUV_UNLIMITED_BT2020
38
39
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
40
                                   defined(__i386__) || defined(_M_IX86))
41
#define LIBYUV_ARGBTOUV_PAVGB 1
42
#endif
43
#if defined(LIBYUV_BIT_EXACT)
44
#define LIBYUV_UNATTENUATE_DUP 1
45
#endif
46
47
// llvm x86 is poor at ternary operator, so use branchless min/max.
48
49
#define USE_BRANCHLESS 1
50
#if defined(USE_BRANCHLESS)
51
0
static __inline int32_t clamp0(int32_t v) {
52
0
  return -(v >= 0) & v;
53
0
}
54
// TODO(fbarchard): make clamp255 preserve negative values.
55
0
static __inline int32_t clamp255(int32_t v) {
56
0
  return (-(v >= 255) | v) & 255;
57
0
}
58
59
0
static __inline int32_t clamp1023(int32_t v) {
60
0
  return (-(v >= 1023) | v) & 1023;
61
0
}
62
63
// clamp to max
64
0
static __inline int32_t ClampMax(int32_t v, int32_t max) {
65
0
  return (-(v >= max) | v) & max;
66
0
}
67
68
0
static __inline uint32_t Abs(int32_t v) {
69
0
  int m = -(v < 0);
70
0
  return (v + m) ^ m;
71
0
}
72
#else   // USE_BRANCHLESS
73
static __inline int32_t clamp0(int32_t v) {
74
  return (v < 0) ? 0 : v;
75
}
76
77
static __inline int32_t clamp255(int32_t v) {
78
  return (v > 255) ? 255 : v;
79
}
80
81
static __inline int32_t clamp1023(int32_t v) {
82
  return (v > 1023) ? 1023 : v;
83
}
84
85
static __inline int32_t ClampMax(int32_t v, int32_t max) {
86
  return (v > max) ? max : v;
87
}
88
89
static __inline uint32_t Abs(int32_t v) {
90
  return (v < 0) ? -v : v;
91
}
92
#endif  // USE_BRANCHLESS
93
0
static __inline uint32_t Clamp(int32_t val) {
94
0
  int v = clamp0(val);
95
0
  return (uint32_t)(clamp255(v));
96
0
}
97
98
0
static __inline uint32_t Clamp10(int32_t val) {
99
0
  int v = clamp0(val);
100
0
  return (uint32_t)(clamp1023(v));
101
0
}
102
103
// Little Endian
104
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
105
    defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) ||     \
106
    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
107
0
#define WRITEWORD(p, v) *(uint32_t*)(p) = v
108
#else
109
static inline void WRITEWORD(uint8_t* p, uint32_t v) {
110
  p[0] = (uint8_t)(v & 255);
111
  p[1] = (uint8_t)((v >> 8) & 255);
112
  p[2] = (uint8_t)((v >> 16) & 255);
113
  p[3] = (uint8_t)((v >> 24) & 255);
114
}
115
#endif
116
117
0
void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
118
0
  int x;
119
0
  for (x = 0; x < width; ++x) {
120
0
    uint8_t b = src_rgb24[0];
121
0
    uint8_t g = src_rgb24[1];
122
0
    uint8_t r = src_rgb24[2];
123
0
    dst_argb[0] = b;
124
0
    dst_argb[1] = g;
125
0
    dst_argb[2] = r;
126
0
    dst_argb[3] = 255u;
127
0
    dst_argb += 4;
128
0
    src_rgb24 += 3;
129
0
  }
130
0
}
131
132
0
void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
133
0
  int x;
134
0
  for (x = 0; x < width; ++x) {
135
0
    uint8_t r = src_raw[0];
136
0
    uint8_t g = src_raw[1];
137
0
    uint8_t b = src_raw[2];
138
0
    dst_argb[0] = b;
139
0
    dst_argb[1] = g;
140
0
    dst_argb[2] = r;
141
0
    dst_argb[3] = 255u;
142
0
    dst_argb += 4;
143
0
    src_raw += 3;
144
0
  }
145
0
}
146
147
0
void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
148
0
  int x;
149
0
  for (x = 0; x < width; ++x) {
150
0
    uint8_t r = src_raw[0];
151
0
    uint8_t g = src_raw[1];
152
0
    uint8_t b = src_raw[2];
153
0
    dst_rgba[0] = 255u;
154
0
    dst_rgba[1] = b;
155
0
    dst_rgba[2] = g;
156
0
    dst_rgba[3] = r;
157
0
    dst_rgba += 4;
158
0
    src_raw += 3;
159
0
  }
160
0
}
161
162
0
void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
163
0
  int x;
164
0
  for (x = 0; x < width; ++x) {
165
0
    uint8_t r = src_raw[0];
166
0
    uint8_t g = src_raw[1];
167
0
    uint8_t b = src_raw[2];
168
0
    dst_rgb24[0] = b;
169
0
    dst_rgb24[1] = g;
170
0
    dst_rgb24[2] = r;
171
0
    dst_rgb24 += 3;
172
0
    src_raw += 3;
173
0
  }
174
0
}
175
176
void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
177
                       uint8_t* dst_argb,
178
0
                       int width) {
179
0
  int x;
180
0
  for (x = 0; x < width; ++x) {
181
0
    uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
182
0
    uint8_t g = STATIC_CAST(
183
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
184
0
    uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
185
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
186
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
187
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
188
0
    dst_argb[3] = 255u;
189
0
    dst_argb += 4;
190
0
    src_rgb565 += 2;
191
0
  }
192
0
}
193
194
void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
195
                         uint8_t* dst_argb,
196
0
                         int width) {
197
0
  int x;
198
0
  for (x = 0; x < width; ++x) {
199
0
    uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
200
0
    uint8_t g = STATIC_CAST(
201
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
202
0
    uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
203
0
    uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
204
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
205
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
206
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
207
0
    dst_argb[3] = -a;
208
0
    dst_argb += 4;
209
0
    src_argb1555 += 2;
210
0
  }
211
0
}
212
213
void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
214
                         uint8_t* dst_argb,
215
0
                         int width) {
216
0
  int x;
217
0
  for (x = 0; x < width; ++x) {
218
0
    uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
219
0
    uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
220
0
    uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
221
0
    uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
222
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
223
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
224
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
225
0
    dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
226
0
    dst_argb += 4;
227
0
    src_argb4444 += 2;
228
0
  }
229
0
}
230
231
0
void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
232
0
  int x;
233
0
  for (x = 0; x < width; ++x) {
234
0
    uint32_t ar30;
235
0
    memcpy(&ar30, src_ar30, sizeof ar30);
236
0
    uint32_t b = (ar30 >> 2) & 0xff;
237
0
    uint32_t g = (ar30 >> 12) & 0xff;
238
0
    uint32_t r = (ar30 >> 22) & 0xff;
239
0
    uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
240
0
    *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
241
0
    dst_argb += 4;
242
0
    src_ar30 += 4;
243
0
  }
244
0
}
245
246
0
void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
247
0
  int x;
248
0
  for (x = 0; x < width; ++x) {
249
0
    uint32_t ar30;
250
0
    memcpy(&ar30, src_ar30, sizeof ar30);
251
0
    uint32_t b = (ar30 >> 2) & 0xff;
252
0
    uint32_t g = (ar30 >> 12) & 0xff;
253
0
    uint32_t r = (ar30 >> 22) & 0xff;
254
0
    uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
255
0
    *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
256
0
    dst_abgr += 4;
257
0
    src_ar30 += 4;
258
0
  }
259
0
}
260
261
0
void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
262
0
  int x;
263
0
  for (x = 0; x < width; ++x) {
264
0
    uint32_t ar30;
265
0
    memcpy(&ar30, src_ar30, sizeof ar30);
266
0
    uint32_t b = ar30 & 0x3ff;
267
0
    uint32_t ga = ar30 & 0xc00ffc00;
268
0
    uint32_t r = (ar30 >> 20) & 0x3ff;
269
0
    *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
270
0
    dst_ab30 += 4;
271
0
    src_ar30 += 4;
272
0
  }
273
0
}
274
275
0
void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
276
0
  int x;
277
0
  for (x = 0; x < width; ++x) {
278
0
    uint8_t b = src_argb[0];
279
0
    uint8_t g = src_argb[1];
280
0
    uint8_t r = src_argb[2];
281
0
    uint8_t a = src_argb[3];
282
0
    dst_abgr[0] = r;
283
0
    dst_abgr[1] = g;
284
0
    dst_abgr[2] = b;
285
0
    dst_abgr[3] = a;
286
0
    dst_abgr += 4;
287
0
    src_argb += 4;
288
0
  }
289
0
}
290
291
0
void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
292
0
  int x;
293
0
  for (x = 0; x < width; ++x) {
294
0
    uint8_t b = src_argb[0];
295
0
    uint8_t g = src_argb[1];
296
0
    uint8_t r = src_argb[2];
297
0
    uint8_t a = src_argb[3];
298
0
    dst_bgra[0] = a;
299
0
    dst_bgra[1] = r;
300
0
    dst_bgra[2] = g;
301
0
    dst_bgra[3] = b;
302
0
    dst_bgra += 4;
303
0
    src_argb += 4;
304
0
  }
305
0
}
306
307
0
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
308
0
  int x;
309
0
  for (x = 0; x < width; ++x) {
310
0
    uint8_t b = src_argb[0];
311
0
    uint8_t g = src_argb[1];
312
0
    uint8_t r = src_argb[2];
313
0
    uint8_t a = src_argb[3];
314
0
    dst_rgba[0] = a;
315
0
    dst_rgba[1] = b;
316
0
    dst_rgba[2] = g;
317
0
    dst_rgba[3] = r;
318
0
    dst_rgba += 4;
319
0
    src_argb += 4;
320
0
  }
321
0
}
322
323
0
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
324
0
  int x;
325
0
  for (x = 0; x < width; ++x) {
326
0
    uint8_t b = src_argb[0];
327
0
    uint8_t g = src_argb[1];
328
0
    uint8_t r = src_argb[2];
329
0
    dst_rgb[0] = b;
330
0
    dst_rgb[1] = g;
331
0
    dst_rgb[2] = r;
332
0
    dst_rgb += 3;
333
0
    src_argb += 4;
334
0
  }
335
0
}
336
337
0
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
338
0
  int x;
339
0
  for (x = 0; x < width; ++x) {
340
0
    uint8_t b = src_argb[0];
341
0
    uint8_t g = src_argb[1];
342
0
    uint8_t r = src_argb[2];
343
0
    dst_rgb[0] = r;
344
0
    dst_rgb[1] = g;
345
0
    dst_rgb[2] = b;
346
0
    dst_rgb += 3;
347
0
    src_argb += 4;
348
0
  }
349
0
}
350
351
0
void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
352
0
  int x;
353
0
  for (x = 0; x < width; ++x) {
354
0
    uint8_t a = src_rgba[0];
355
0
    uint8_t b = src_rgba[1];
356
0
    uint8_t g = src_rgba[2];
357
0
    uint8_t r = src_rgba[3];
358
0
    dst_argb[0] = b;
359
0
    dst_argb[1] = g;
360
0
    dst_argb[2] = r;
361
0
    dst_argb[3] = a;
362
0
    dst_argb += 4;
363
0
    src_rgba += 4;
364
0
  }
365
0
}
366
367
0
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
368
0
  int x;
369
0
  for (x = 0; x < width - 1; x += 2) {
370
0
    uint8_t b0 = src_argb[0] >> 3;
371
0
    uint8_t g0 = src_argb[1] >> 2;
372
0
    uint8_t r0 = src_argb[2] >> 3;
373
0
    uint8_t b1 = src_argb[4] >> 3;
374
0
    uint8_t g1 = src_argb[5] >> 2;
375
0
    uint8_t r1 = src_argb[6] >> 3;
376
0
    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
377
0
                           (r1 << 27));
378
0
    dst_rgb += 4;
379
0
    src_argb += 8;
380
0
  }
381
0
  if (width & 1) {
382
0
    uint8_t b0 = src_argb[0] >> 3;
383
0
    uint8_t g0 = src_argb[1] >> 2;
384
0
    uint8_t r0 = src_argb[2] >> 3;
385
0
    *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
386
0
  }
387
0
}
388
389
// dither4 is a row of 4 values from 4x4 dither matrix.
390
// The 4x4 matrix contains values to increase RGB.  When converting to
391
// fewer bits (565) this provides an ordered dither.
392
// The order in the 4x4 matrix in first byte is upper left.
393
// The 4 values are passed as an int, then referenced as an array, so
394
// endian will not affect order of the original matrix.  But the dither4
395
// will containing the first pixel in the lower byte for little endian
396
// or the upper byte for big endian.
397
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
398
                             uint8_t* dst_rgb,
399
                             uint32_t dither4,
400
0
                             int width) {
401
0
  int x;
402
0
  for (x = 0; x < width - 1; x += 2) {
403
0
    int dither0 = ((const unsigned char*)(&dither4))[x & 3];
404
0
    int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
405
0
    uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
406
0
    uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
407
0
    uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
408
0
    uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
409
0
    uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
410
0
    uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
411
0
    *(uint16_t*)(dst_rgb + 0) =
412
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
413
0
    *(uint16_t*)(dst_rgb + 2) =
414
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
415
0
    dst_rgb += 4;
416
0
    src_argb += 8;
417
0
  }
418
0
  if (width & 1) {
419
0
    int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
420
0
    uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
421
0
    uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
422
0
    uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
423
0
    *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
424
0
  }
425
0
}
426
427
0
void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
428
0
  int x;
429
0
  for (x = 0; x < width - 1; x += 2) {
430
0
    uint8_t b0 = src_argb[0] >> 3;
431
0
    uint8_t g0 = src_argb[1] >> 3;
432
0
    uint8_t r0 = src_argb[2] >> 3;
433
0
    uint8_t a0 = src_argb[3] >> 7;
434
0
    uint8_t b1 = src_argb[4] >> 3;
435
0
    uint8_t g1 = src_argb[5] >> 3;
436
0
    uint8_t r1 = src_argb[6] >> 3;
437
0
    uint8_t a1 = src_argb[7] >> 7;
438
0
    *(uint16_t*)(dst_rgb + 0) =
439
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
440
0
    *(uint16_t*)(dst_rgb + 2) =
441
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
442
0
    dst_rgb += 4;
443
0
    src_argb += 8;
444
0
  }
445
0
  if (width & 1) {
446
0
    uint8_t b0 = src_argb[0] >> 3;
447
0
    uint8_t g0 = src_argb[1] >> 3;
448
0
    uint8_t r0 = src_argb[2] >> 3;
449
0
    uint8_t a0 = src_argb[3] >> 7;
450
0
    *(uint16_t*)(dst_rgb) =
451
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
452
0
  }
453
0
}
454
455
0
void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
456
0
  int x;
457
0
  for (x = 0; x < width - 1; x += 2) {
458
0
    uint8_t b0 = src_argb[0] >> 4;
459
0
    uint8_t g0 = src_argb[1] >> 4;
460
0
    uint8_t r0 = src_argb[2] >> 4;
461
0
    uint8_t a0 = src_argb[3] >> 4;
462
0
    uint8_t b1 = src_argb[4] >> 4;
463
0
    uint8_t g1 = src_argb[5] >> 4;
464
0
    uint8_t r1 = src_argb[6] >> 4;
465
0
    uint8_t a1 = src_argb[7] >> 4;
466
0
    *(uint16_t*)(dst_rgb + 0) =
467
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
468
0
    *(uint16_t*)(dst_rgb + 2) =
469
0
        STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
470
0
    dst_rgb += 4;
471
0
    src_argb += 8;
472
0
  }
473
0
  if (width & 1) {
474
0
    uint8_t b0 = src_argb[0] >> 4;
475
0
    uint8_t g0 = src_argb[1] >> 4;
476
0
    uint8_t r0 = src_argb[2] >> 4;
477
0
    uint8_t a0 = src_argb[3] >> 4;
478
0
    *(uint16_t*)(dst_rgb) =
479
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
480
0
  }
481
0
}
482
483
0
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
484
0
  int x;
485
0
  for (x = 0; x < width; ++x) {
486
0
    uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
487
0
    uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
488
0
    uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
489
0
    uint32_t a0 = (src_abgr[3] >> 6);
490
0
    *(uint32_t*)(dst_ar30) =
491
0
        STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
492
0
    dst_ar30 += 4;
493
0
    src_abgr += 4;
494
0
  }
495
0
}
496
497
0
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
498
0
  int x;
499
0
  for (x = 0; x < width; ++x) {
500
0
    uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
501
0
    uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
502
0
    uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
503
0
    uint32_t a0 = (src_argb[3] >> 6);
504
0
    *(uint32_t*)(dst_ar30) =
505
0
        STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
506
0
    dst_ar30 += 4;
507
0
    src_argb += 4;
508
0
  }
509
0
}
510
511
0
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
512
0
  int x;
513
0
  for (x = 0; x < width; ++x) {
514
0
    uint16_t b = src_argb[0] * 0x0101;
515
0
    uint16_t g = src_argb[1] * 0x0101;
516
0
    uint16_t r = src_argb[2] * 0x0101;
517
0
    uint16_t a = src_argb[3] * 0x0101;
518
0
    dst_ar64[0] = b;
519
0
    dst_ar64[1] = g;
520
0
    dst_ar64[2] = r;
521
0
    dst_ar64[3] = a;
522
0
    dst_ar64 += 4;
523
0
    src_argb += 4;
524
0
  }
525
0
}
526
527
0
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
528
0
  int x;
529
0
  for (x = 0; x < width; ++x) {
530
0
    uint16_t b = src_argb[0] * 0x0101;
531
0
    uint16_t g = src_argb[1] * 0x0101;
532
0
    uint16_t r = src_argb[2] * 0x0101;
533
0
    uint16_t a = src_argb[3] * 0x0101;
534
0
    dst_ab64[0] = r;
535
0
    dst_ab64[1] = g;
536
0
    dst_ab64[2] = b;
537
0
    dst_ab64[3] = a;
538
0
    dst_ab64 += 4;
539
0
    src_argb += 4;
540
0
  }
541
0
}
542
543
0
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
544
0
  int x;
545
0
  for (x = 0; x < width; ++x) {
546
0
    uint8_t b = src_ar64[0] >> 8;
547
0
    uint8_t g = src_ar64[1] >> 8;
548
0
    uint8_t r = src_ar64[2] >> 8;
549
0
    uint8_t a = src_ar64[3] >> 8;
550
0
    dst_argb[0] = b;
551
0
    dst_argb[1] = g;
552
0
    dst_argb[2] = r;
553
0
    dst_argb[3] = a;
554
0
    dst_argb += 4;
555
0
    src_ar64 += 4;
556
0
  }
557
0
}
558
559
0
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
560
0
  int x;
561
0
  for (x = 0; x < width; ++x) {
562
0
    uint8_t r = src_ab64[0] >> 8;
563
0
    uint8_t g = src_ab64[1] >> 8;
564
0
    uint8_t b = src_ab64[2] >> 8;
565
0
    uint8_t a = src_ab64[3] >> 8;
566
0
    dst_argb[0] = b;
567
0
    dst_argb[1] = g;
568
0
    dst_argb[2] = r;
569
0
    dst_argb[3] = a;
570
0
    dst_argb += 4;
571
0
    src_ab64 += 4;
572
0
  }
573
0
}
574
575
0
void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
576
0
  int x;
577
0
  for (x = 0; x < width; ++x) {
578
0
    uint16_t b = src_ar64[0];
579
0
    uint16_t g = src_ar64[1];
580
0
    uint16_t r = src_ar64[2];
581
0
    uint16_t a = src_ar64[3];
582
0
    dst_ab64[0] = r;
583
0
    dst_ab64[1] = g;
584
0
    dst_ab64[2] = b;
585
0
    dst_ab64[3] = a;
586
0
    dst_ab64 += 4;
587
0
    src_ar64 += 4;
588
0
  }
589
0
}
590
591
// TODO(fbarchard): Make shuffle compatible with SIMD versions
592
void AR64ShuffleRow_C(const uint8_t* src_ar64,
593
                      uint8_t* dst_ar64,
594
                      const uint8_t* shuffler,
595
0
                      int width) {
596
0
  const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
597
0
  uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
598
0
  int index0 = shuffler[0] / 2;
599
0
  int index1 = shuffler[2] / 2;
600
0
  int index2 = shuffler[4] / 2;
601
0
  int index3 = shuffler[6] / 2;
602
  // Shuffle a row of AR64.
603
0
  int x;
604
0
  for (x = 0; x < width / 2; ++x) {
605
    // To support in-place conversion.
606
0
    uint16_t b = src_ar64_16[index0];
607
0
    uint16_t g = src_ar64_16[index1];
608
0
    uint16_t r = src_ar64_16[index2];
609
0
    uint16_t a = src_ar64_16[index3];
610
0
    dst_ar64_16[0] = b;
611
0
    dst_ar64_16[1] = g;
612
0
    dst_ar64_16[2] = r;
613
0
    dst_ar64_16[3] = a;
614
0
    src_ar64_16 += 4;
615
0
    dst_ar64_16 += 4;
616
0
  }
617
0
}
618
// BT601 8 bit Y:
619
// b 0.114 * 219 = 24.966  = 25
620
// g 0.587 * 219 = 128.553 = 129
621
// r 0.299 * 219 = 65.481  = 66
622
// BT601 8 bit U:
623
// b  0.875  * 128 = 112.0    = 112
624
// g -0.5781 * 128 = −73.9968 = -74
625
// r -0.2969 * 128 = −38.0032 = -38
626
// BT601 8 bit V:
627
// b -0.1406 * 128 = −17.9968 = -18
628
// g -0.7344 * 128 = −94.0032 = -94
629
// r  0.875  * 128 = 112.0    = 112
630
631
0
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
632
0
  return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
633
0
}
634
0
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
635
0
  return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
636
0
}
637
0
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
638
0
  return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
639
0
}
640
0
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
641
642
// ARGBToY_C and ARGBToUV_C
643
// Intel version of UV mimic SSE/AVX which does 2 pavgb
644
#if defined(LIBYUV_ARGBTOUV_PAVGB)
645
#define MAKEROWY(NAME, R, G, B, BPP)                                       \
646
0
  void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
647
0
    int x;                                                                 \
648
0
    for (x = 0; x < width; ++x) {                                          \
649
0
      dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
650
0
      src_rgb += BPP;                                                      \
651
0
      dst_y += 1;                                                          \
652
0
    }                                                                      \
653
0
  }                                                                        \
Unexecuted instantiation: ARGBToYRow_C
Unexecuted instantiation: BGRAToYRow_C
Unexecuted instantiation: ABGRToYRow_C
Unexecuted instantiation: RGBAToYRow_C
Unexecuted instantiation: RGB24ToYRow_C
Unexecuted instantiation: RAWToYRow_C
654
  void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
655
0
                       uint8_t* dst_u, uint8_t* dst_v, int width) {        \
656
0
    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
657
0
    int x;                                                                 \
658
0
    for (x = 0; x < width - 1; x += 2) {                                   \
659
0
      uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]),                     \
660
0
                        AVGB(src_rgb[B + BPP], src_rgb1[B + BPP]));        \
661
0
      uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]),                     \
662
0
                        AVGB(src_rgb[G + BPP], src_rgb1[G + BPP]));        \
663
0
      uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]),                     \
664
0
                        AVGB(src_rgb[R + BPP], src_rgb1[R + BPP]));        \
665
0
      dst_u[0] = RGBToU(ar, ag, ab);                                       \
666
0
      dst_v[0] = RGBToV(ar, ag, ab);                                       \
667
0
      src_rgb += BPP * 2;                                                  \
668
0
      src_rgb1 += BPP * 2;                                                 \
669
0
      dst_u += 1;                                                          \
670
0
      dst_v += 1;                                                          \
671
0
    }                                                                      \
672
0
    if (width & 1) {                                                       \
673
0
      uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]);                          \
674
0
      uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]);                          \
675
0
      uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]);                          \
676
0
      dst_u[0] = RGBToU(ar, ag, ab);                                       \
677
0
      dst_v[0] = RGBToV(ar, ag, ab);                                       \
678
0
    }                                                                      \
679
0
  }
Unexecuted instantiation: ARGBToUVRow_C
Unexecuted instantiation: BGRAToUVRow_C
Unexecuted instantiation: ABGRToUVRow_C
Unexecuted instantiation: RGBAToUVRow_C
Unexecuted instantiation: RGB24ToUVRow_C
Unexecuted instantiation: RAWToUVRow_C
680
#else
681
// ARM version does average of 4 pixels with rounding
682
#define MAKEROWY(NAME, R, G, B, BPP)                                       \
683
  void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
684
    int x;                                                                 \
685
    for (x = 0; x < width; ++x) {                                          \
686
      dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
687
      src_rgb += BPP;                                                      \
688
      dst_y += 1;                                                          \
689
    }                                                                      \
690
  }                                                                        \
691
  void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
692
                       uint8_t* dst_u, uint8_t* dst_v, int width) {        \
693
    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
694
    int x;                                                                 \
695
    for (x = 0; x < width - 1; x += 2) {                                   \
696
      uint8_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +          \
697
                    src_rgb1[B + BPP] + 2) >>                              \
698
                   2;                                                      \
699
      uint8_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +          \
700
                    src_rgb1[G + BPP] + 2) >>                              \
701
                   2;                                                      \
702
      uint8_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +          \
703
                    src_rgb1[R + BPP] + 2) >>                              \
704
                   2;                                                      \
705
      dst_u[0] = RGBToU(ar, ag, ab);                                       \
706
      dst_v[0] = RGBToV(ar, ag, ab);                                       \
707
      src_rgb += BPP * 2;                                                  \
708
      src_rgb1 += BPP * 2;                                                 \
709
      dst_u += 1;                                                          \
710
      dst_v += 1;                                                          \
711
    }                                                                      \
712
    if (width & 1) {                                                       \
713
      uint8_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1;                    \
714
      uint8_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1;                    \
715
      uint8_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1;                    \
716
      dst_u[0] = RGBToU(ar, ag, ab);                                       \
717
      dst_v[0] = RGBToV(ar, ag, ab);                                       \
718
    }                                                                      \
719
  }
720
#endif
721
722
MAKEROWY(ARGB, 2, 1, 0, 4)
723
MAKEROWY(BGRA, 1, 2, 3, 4)
724
MAKEROWY(ABGR, 0, 1, 2, 4)
725
MAKEROWY(RGBA, 3, 2, 1, 4)
726
MAKEROWY(RGB24, 2, 1, 0, 3)
727
MAKEROWY(RAW, 0, 1, 2, 3)
728
#undef MAKEROWY
729
730
// JPeg uses BT.601-1 full range
731
// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
732
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
733
// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
734
// JPeg 8 bit Y:
735
// b 0.11400 * 256 = 29.184 = 29
736
// g 0.58700 * 256 = 150.272 = 150
737
// r 0.29900 * 256 = 76.544 = 77
738
// JPeg 8 bit U:
739
// b  0.50000 * 256 = 128.0 = 128
740
// g -0.33126 * 256 = −84.80256 = -85
741
// r -0.16874 * 256 = −43.19744 = -43
742
// JPeg 8 bit V:
743
// b -0.08131 * 256 = −20.81536 = -21
744
// g -0.41869 * 256 = −107.18464 = -107
745
// r  0.50000 * 256 = 128.0 = 128
746
747
// 8 bit
748
0
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
749
0
  return (77 * r + 150 * g + 29 * b + 128) >> 8;
750
0
}
751
0
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
752
0
  return (128 * b - 85 * g - 43 * r + 0x8000) >> 8;
753
0
}
754
0
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
755
0
  return (128 * r - 107 * g - 21 * b + 0x8000) >> 8;
756
0
}
757
758
// ARGBToYJ_C and ARGBToUVJ_C
759
// Intel version mimic SSE/AVX which does 2 pavgb
760
#if defined(LIBYUV_ARGBTOUV_PAVGB)
761
#define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
762
0
  void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
763
0
    int x;                                                                  \
764
0
    for (x = 0; x < width; ++x) {                                           \
765
0
      dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
766
0
      src_rgb += BPP;                                                       \
767
0
      dst_y += 1;                                                           \
768
0
    }                                                                       \
769
0
  }                                                                         \
Unexecuted instantiation: ARGBToYJRow_C
Unexecuted instantiation: ABGRToYJRow_C
Unexecuted instantiation: RGBAToYJRow_C
Unexecuted instantiation: RGB24ToYJRow_C
Unexecuted instantiation: RAWToYJRow_C
770
  void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
771
0
                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
772
0
    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
773
0
    int x;                                                                  \
774
0
    for (x = 0; x < width - 1; x += 2) {                                    \
775
0
      uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]),                      \
776
0
                        AVGB(src_rgb[B + BPP], src_rgb1[B + BPP]));         \
777
0
      uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]),                      \
778
0
                        AVGB(src_rgb[G + BPP], src_rgb1[G + BPP]));         \
779
0
      uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]),                      \
780
0
                        AVGB(src_rgb[R + BPP], src_rgb1[R + BPP]));         \
781
0
      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
782
0
      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
783
0
      src_rgb += BPP * 2;                                                   \
784
0
      src_rgb1 += BPP * 2;                                                  \
785
0
      dst_u += 1;                                                           \
786
0
      dst_v += 1;                                                           \
787
0
    }                                                                       \
788
0
    if (width & 1) {                                                        \
789
0
      uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]);                           \
790
0
      uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]);                           \
791
0
      uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]);                           \
792
0
      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
793
0
      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
794
0
    }                                                                       \
795
0
  }
Unexecuted instantiation: ARGBToUVJRow_C
Unexecuted instantiation: ABGRToUVJRow_C
Unexecuted instantiation: RGBAToUVJRow_C
Unexecuted instantiation: RGB24ToUVJRow_C
Unexecuted instantiation: RAWToUVJRow_C
796
#else
797
// ARM version does average of 4 pixels with rounding
798
#define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
799
  void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
800
    int x;                                                                  \
801
    for (x = 0; x < width; ++x) {                                           \
802
      dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
803
      src_rgb += BPP;                                                       \
804
      dst_y += 1;                                                           \
805
    }                                                                       \
806
  }                                                                         \
807
  void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
808
                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
809
    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
810
    int x;                                                                  \
811
    for (x = 0; x < width - 1; x += 2) {                                    \
812
      uint8_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +           \
813
                    src_rgb1[B + BPP] + 2) >>                               \
814
                   2;                                                       \
815
      uint8_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +           \
816
                    src_rgb1[G + BPP] + 2) >>                               \
817
                   2;                                                       \
818
      uint8_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +           \
819
                    src_rgb1[R + BPP] + 2) >>                               \
820
                   2;                                                       \
821
      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
822
      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
823
      src_rgb += BPP * 2;                                                   \
824
      src_rgb1 += BPP * 2;                                                  \
825
      dst_u += 1;                                                           \
826
      dst_v += 1;                                                           \
827
    }                                                                       \
828
    if (width & 1) {                                                        \
829
      uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1;                    \
830
      uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1;                    \
831
      uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1;                    \
832
      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
833
      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
834
    }                                                                       \
835
  }
836
837
#endif
838
839
MAKEROWYJ(ARGB, 2, 1, 0, 4)
840
MAKEROWYJ(ABGR, 0, 1, 2, 4)
841
MAKEROWYJ(RGBA, 3, 2, 1, 4)
842
MAKEROWYJ(RGB24, 2, 1, 0, 3)
843
MAKEROWYJ(RAW, 0, 1, 2, 3)
844
#undef MAKEROWYJ
845
846
0
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
847
0
  int x;
848
0
  for (x = 0; x < width; ++x) {
849
0
    uint8_t b = src_rgb565[0] & 0x1f;
850
0
    uint8_t g = STATIC_CAST(
851
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
852
0
    uint8_t r = src_rgb565[1] >> 3;
853
0
    b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
854
0
    g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
855
0
    r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
856
0
    dst_y[0] = RGBToY(r, g, b);
857
0
    src_rgb565 += 2;
858
0
    dst_y += 1;
859
0
  }
860
0
}
861
862
0
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
863
0
  int x;
864
0
  for (x = 0; x < width; ++x) {
865
0
    uint8_t b = src_argb1555[0] & 0x1f;
866
0
    uint8_t g = STATIC_CAST(
867
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
868
0
    uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
869
0
    b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
870
0
    g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
871
0
    r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
872
0
    dst_y[0] = RGBToY(r, g, b);
873
0
    src_argb1555 += 2;
874
0
    dst_y += 1;
875
0
  }
876
0
}
877
878
0
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
879
0
  int x;
880
0
  for (x = 0; x < width; ++x) {
881
0
    uint8_t b = src_argb4444[0] & 0x0f;
882
0
    uint8_t g = src_argb4444[0] >> 4;
883
0
    uint8_t r = src_argb4444[1] & 0x0f;
884
0
    b = STATIC_CAST(uint8_t, (b << 4) | b);
885
0
    g = STATIC_CAST(uint8_t, (g << 4) | g);
886
0
    r = STATIC_CAST(uint8_t, (r << 4) | r);
887
0
    dst_y[0] = RGBToY(r, g, b);
888
0
    src_argb4444 += 2;
889
0
    dst_y += 1;
890
0
  }
891
0
}
892
893
void RGB565ToUVRow_C(const uint8_t* src_rgb565,
894
                     int src_stride_rgb565,
895
                     uint8_t* dst_u,
896
                     uint8_t* dst_v,
897
0
                     int width) {
898
0
  const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
899
0
  int x;
900
0
  for (x = 0; x < width - 1; x += 2) {
901
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
902
0
    uint8_t g0 = STATIC_CAST(
903
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
904
0
    uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
905
0
    uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
906
0
    uint8_t g1 = STATIC_CAST(
907
0
        uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
908
0
    uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
909
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
910
0
    uint8_t g2 = STATIC_CAST(
911
0
        uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
912
0
    uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
913
0
    uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
914
0
    uint8_t g3 = STATIC_CAST(
915
0
        uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
916
0
    uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
917
918
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
919
0
    g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
920
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
921
0
    b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
922
0
    g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
923
0
    r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
924
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
925
0
    g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
926
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
927
0
    b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
928
0
    g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
929
0
    r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
930
931
0
#if defined(LIBYUV_ARGBTOUV_PAVGB)
932
0
    uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
933
0
    uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
934
0
    uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
935
0
    dst_u[0] = RGBToU(ar, ag, ab);
936
0
    dst_v[0] = RGBToV(ar, ag, ab);
937
#else
938
    uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
939
    uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
940
    uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
941
    dst_u[0] = RGBToU(r, g, b);
942
    dst_v[0] = RGBToV(r, g, b);
943
#endif
944
945
0
    src_rgb565 += 4;
946
0
    next_rgb565 += 4;
947
0
    dst_u += 1;
948
0
    dst_v += 1;
949
0
  }
950
0
  if (width & 1) {
951
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
952
0
    uint8_t g0 = STATIC_CAST(
953
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
954
0
    uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
955
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
956
0
    uint8_t g2 = STATIC_CAST(
957
0
        uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
958
0
    uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
959
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
960
0
    g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
961
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
962
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
963
0
    g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
964
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
965
966
0
    uint8_t ab = AVGB(b0, b2);
967
0
    uint8_t ag = AVGB(g0, g2);
968
0
    uint8_t ar = AVGB(r0, r2);
969
0
    dst_u[0] = RGBToU(ar, ag, ab);
970
0
    dst_v[0] = RGBToV(ar, ag, ab);
971
0
  }
972
0
}
973
974
void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
975
                       int src_stride_argb1555,
976
                       uint8_t* dst_u,
977
                       uint8_t* dst_v,
978
0
                       int width) {
979
0
  const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
980
0
  int x;
981
0
  for (x = 0; x < width - 1; x += 2) {
982
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
983
0
    uint8_t g0 = STATIC_CAST(
984
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
985
0
    uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
986
0
    uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
987
0
    uint8_t g1 = STATIC_CAST(
988
0
        uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
989
0
    uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
990
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
991
0
    uint8_t g2 = STATIC_CAST(
992
0
        uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
993
0
    uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
994
0
    uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
995
0
    uint8_t g3 = STATIC_CAST(
996
0
        uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
997
0
    uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
998
999
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1000
0
    g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
1001
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1002
0
    b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
1003
0
    g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
1004
0
    r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
1005
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1006
0
    g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
1007
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1008
0
    b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
1009
0
    g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
1010
0
    r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
1011
1012
0
#if defined(LIBYUV_ARGBTOUV_PAVGB)
1013
0
    uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1014
0
    uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1015
0
    uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1016
0
    dst_u[0] = RGBToU(ar, ag, ab);
1017
0
    dst_v[0] = RGBToV(ar, ag, ab);
1018
#else
1019
    uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
1020
    uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
1021
    uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
1022
    dst_u[0] = RGBToU(r, g, b);
1023
    dst_v[0] = RGBToV(r, g, b);
1024
#endif
1025
1026
0
    src_argb1555 += 4;
1027
0
    next_argb1555 += 4;
1028
0
    dst_u += 1;
1029
0
    dst_v += 1;
1030
0
  }
1031
0
  if (width & 1) {
1032
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
1033
0
    uint8_t g0 = STATIC_CAST(
1034
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
1035
0
    uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
1036
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
1037
0
    uint8_t g2 = STATIC_CAST(
1038
0
        uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
1039
0
    uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
1040
1041
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1042
0
    g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
1043
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1044
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1045
0
    g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
1046
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1047
1048
0
    uint8_t ab = AVGB(b0, b2);
1049
0
    uint8_t ag = AVGB(g0, g2);
1050
0
    uint8_t ar = AVGB(r0, r2);
1051
0
    dst_u[0] = RGBToU(ar, ag, ab);
1052
0
    dst_v[0] = RGBToV(ar, ag, ab);
1053
0
  }
1054
0
}
1055
1056
void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
1057
                       int src_stride_argb4444,
1058
                       uint8_t* dst_u,
1059
                       uint8_t* dst_v,
1060
0
                       int width) {
1061
0
  const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
1062
0
  int x;
1063
0
  for (x = 0; x < width - 1; x += 2) {
1064
0
    uint8_t b0 = src_argb4444[0] & 0x0f;
1065
0
    uint8_t g0 = src_argb4444[0] >> 4;
1066
0
    uint8_t r0 = src_argb4444[1] & 0x0f;
1067
0
    uint8_t b1 = src_argb4444[2] & 0x0f;
1068
0
    uint8_t g1 = src_argb4444[2] >> 4;
1069
0
    uint8_t r1 = src_argb4444[3] & 0x0f;
1070
0
    uint8_t b2 = next_argb4444[0] & 0x0f;
1071
0
    uint8_t g2 = next_argb4444[0] >> 4;
1072
0
    uint8_t r2 = next_argb4444[1] & 0x0f;
1073
0
    uint8_t b3 = next_argb4444[2] & 0x0f;
1074
0
    uint8_t g3 = next_argb4444[2] >> 4;
1075
0
    uint8_t r3 = next_argb4444[3] & 0x0f;
1076
1077
0
    b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1078
0
    g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1079
0
    r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1080
0
    b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
1081
0
    g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
1082
0
    r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
1083
0
    b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1084
0
    g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1085
0
    r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1086
0
    b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
1087
0
    g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
1088
0
    r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
1089
1090
0
#if defined(LIBYUV_ARGBTOUV_PAVGB)
1091
0
    uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1092
0
    uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1093
0
    uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1094
0
    dst_u[0] = RGBToU(ar, ag, ab);
1095
0
    dst_v[0] = RGBToV(ar, ag, ab);
1096
#else
1097
    uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
1098
    uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
1099
    uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
1100
    dst_u[0] = RGBToU(r, g, b);
1101
    dst_v[0] = RGBToV(r, g, b);
1102
#endif
1103
1104
0
    src_argb4444 += 4;
1105
0
    next_argb4444 += 4;
1106
0
    dst_u += 1;
1107
0
    dst_v += 1;
1108
0
  }
1109
0
  if (width & 1) {
1110
0
    uint8_t b0 = src_argb4444[0] & 0x0f;
1111
0
    uint8_t g0 = src_argb4444[0] >> 4;
1112
0
    uint8_t r0 = src_argb4444[1] & 0x0f;
1113
0
    uint8_t b2 = next_argb4444[0] & 0x0f;
1114
0
    uint8_t g2 = next_argb4444[0] >> 4;
1115
0
    uint8_t r2 = next_argb4444[1] & 0x0f;
1116
1117
0
    b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1118
0
    g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1119
0
    r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1120
0
    b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1121
0
    g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1122
0
    r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1123
1124
0
    uint8_t ab = AVGB(b0, b2);
1125
0
    uint8_t ag = AVGB(g0, g2);
1126
0
    uint8_t ar = AVGB(r0, r2);
1127
0
    dst_u[0] = RGBToU(ar, ag, ab);
1128
0
    dst_v[0] = RGBToV(ar, ag, ab);
1129
0
  }
1130
0
}
1131
1132
void ARGBToUV444Row_C(const uint8_t* src_argb,
1133
                      uint8_t* dst_u,
1134
                      uint8_t* dst_v,
1135
0
                      int width) {
1136
0
  int x;
1137
0
  for (x = 0; x < width; ++x) {
1138
0
    uint8_t ab = src_argb[0];
1139
0
    uint8_t ag = src_argb[1];
1140
0
    uint8_t ar = src_argb[2];
1141
0
    dst_u[0] = RGBToU(ar, ag, ab);
1142
0
    dst_v[0] = RGBToV(ar, ag, ab);
1143
0
    src_argb += 4;
1144
0
    dst_u += 1;
1145
0
    dst_v += 1;
1146
0
  }
1147
0
}
1148
1149
void ARGBToUVJ444Row_C(const uint8_t* src_argb,
1150
                       uint8_t* dst_u,
1151
                       uint8_t* dst_v,
1152
0
                       int width) {
1153
0
  int x;
1154
0
  for (x = 0; x < width; ++x) {
1155
0
    uint8_t ab = src_argb[0];
1156
0
    uint8_t ag = src_argb[1];
1157
0
    uint8_t ar = src_argb[2];
1158
0
    dst_u[0] = RGBToUJ(ar, ag, ab);
1159
0
    dst_v[0] = RGBToVJ(ar, ag, ab);
1160
0
    src_argb += 4;
1161
0
    dst_u += 1;
1162
0
    dst_v += 1;
1163
0
  }
1164
0
}
1165
1166
0
void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1167
0
  int x;
1168
0
  for (x = 0; x < width; ++x) {
1169
0
    uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1170
0
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1171
0
    dst_argb[3] = src_argb[3];
1172
0
    dst_argb += 4;
1173
0
    src_argb += 4;
1174
0
  }
1175
0
}
1176
1177
// Convert a row of image to Sepia tone.
1178
0
void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1179
0
  int x;
1180
0
  for (x = 0; x < width; ++x) {
1181
0
    int b = dst_argb[0];
1182
0
    int g = dst_argb[1];
1183
0
    int r = dst_argb[2];
1184
0
    int sb = (b * 17 + g * 68 + r * 35) >> 7;
1185
0
    int sg = (b * 22 + g * 88 + r * 45) >> 7;
1186
0
    int sr = (b * 24 + g * 98 + r * 50) >> 7;
1187
    // b does not over flow. a is preserved from original.
1188
0
    dst_argb[0] = STATIC_CAST(uint8_t, sb);
1189
0
    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
1190
0
    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
1191
0
    dst_argb += 4;
1192
0
  }
1193
0
}
1194
1195
// Apply color matrix to a row of image. Matrix is signed.
1196
// TODO(fbarchard): Consider adding rounding (+32).
1197
void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1198
                          uint8_t* dst_argb,
1199
                          const int8_t* matrix_argb,
1200
0
                          int width) {
1201
0
  int x;
1202
0
  for (x = 0; x < width; ++x) {
1203
0
    int b = src_argb[0];
1204
0
    int g = src_argb[1];
1205
0
    int r = src_argb[2];
1206
0
    int a = src_argb[3];
1207
0
    int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1208
0
              a * matrix_argb[3]) >>
1209
0
             6;
1210
0
    int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1211
0
              a * matrix_argb[7]) >>
1212
0
             6;
1213
0
    int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1214
0
              a * matrix_argb[11]) >>
1215
0
             6;
1216
0
    int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1217
0
              a * matrix_argb[15]) >>
1218
0
             6;
1219
0
    dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
1220
0
    dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
1221
0
    dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
1222
0
    dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
1223
0
    src_argb += 4;
1224
0
    dst_argb += 4;
1225
0
  }
1226
0
}
1227
1228
// Apply color table to a row of image.
1229
void ARGBColorTableRow_C(uint8_t* dst_argb,
1230
                         const uint8_t* table_argb,
1231
0
                         int width) {
1232
0
  int x;
1233
0
  for (x = 0; x < width; ++x) {
1234
0
    int b = dst_argb[0];
1235
0
    int g = dst_argb[1];
1236
0
    int r = dst_argb[2];
1237
0
    int a = dst_argb[3];
1238
0
    dst_argb[0] = table_argb[b * 4 + 0];
1239
0
    dst_argb[1] = table_argb[g * 4 + 1];
1240
0
    dst_argb[2] = table_argb[r * 4 + 2];
1241
0
    dst_argb[3] = table_argb[a * 4 + 3];
1242
0
    dst_argb += 4;
1243
0
  }
1244
0
}
1245
1246
// Apply color table to a row of image.
1247
void RGBColorTableRow_C(uint8_t* dst_argb,
1248
                        const uint8_t* table_argb,
1249
0
                        int width) {
1250
0
  int x;
1251
0
  for (x = 0; x < width; ++x) {
1252
0
    int b = dst_argb[0];
1253
0
    int g = dst_argb[1];
1254
0
    int r = dst_argb[2];
1255
0
    dst_argb[0] = table_argb[b * 4 + 0];
1256
0
    dst_argb[1] = table_argb[g * 4 + 1];
1257
0
    dst_argb[2] = table_argb[r * 4 + 2];
1258
0
    dst_argb += 4;
1259
0
  }
1260
0
}
1261
1262
void ARGBQuantizeRow_C(uint8_t* dst_argb,
1263
                       int scale,
1264
                       int interval_size,
1265
                       int interval_offset,
1266
0
                       int width) {
1267
0
  int x;
1268
0
  for (x = 0; x < width; ++x) {
1269
0
    int b = dst_argb[0];
1270
0
    int g = dst_argb[1];
1271
0
    int r = dst_argb[2];
1272
0
    dst_argb[0] = STATIC_CAST(
1273
0
        uint8_t, (b * scale >> 16) * interval_size + interval_offset);
1274
0
    dst_argb[1] = STATIC_CAST(
1275
0
        uint8_t, (g * scale >> 16) * interval_size + interval_offset);
1276
0
    dst_argb[2] = STATIC_CAST(
1277
0
        uint8_t, (r * scale >> 16) * interval_size + interval_offset);
1278
0
    dst_argb += 4;
1279
0
  }
1280
0
}
1281
1282
0
#define REPEAT8(v) (v) | ((v) << 8)
1283
0
#define SHADE(f, v) v* f >> 24
1284
1285
void ARGBShadeRow_C(const uint8_t* src_argb,
1286
                    uint8_t* dst_argb,
1287
                    int width,
1288
0
                    uint32_t value) {
1289
0
  const uint32_t b_scale = REPEAT8(value & 0xff);
1290
0
  const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1291
0
  const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1292
0
  const uint32_t a_scale = REPEAT8(value >> 24);
1293
1294
0
  int i;
1295
0
  for (i = 0; i < width; ++i) {
1296
0
    const uint32_t b = REPEAT8(src_argb[0]);
1297
0
    const uint32_t g = REPEAT8(src_argb[1]);
1298
0
    const uint32_t r = REPEAT8(src_argb[2]);
1299
0
    const uint32_t a = REPEAT8(src_argb[3]);
1300
0
    dst_argb[0] = SHADE(b, b_scale);
1301
0
    dst_argb[1] = SHADE(g, g_scale);
1302
0
    dst_argb[2] = SHADE(r, r_scale);
1303
0
    dst_argb[3] = SHADE(a, a_scale);
1304
0
    src_argb += 4;
1305
0
    dst_argb += 4;
1306
0
  }
1307
0
}
1308
#undef REPEAT8
1309
#undef SHADE
1310
1311
void ARGBMultiplyRow_C(const uint8_t* src_argb,
1312
                       const uint8_t* src_argb1,
1313
                       uint8_t* dst_argb,
1314
0
                       int width) {
1315
0
  int i;
1316
0
  for (i = 0; i < width; ++i) {
1317
0
    const uint32_t b = src_argb[0];
1318
0
    const uint32_t g = src_argb[1];
1319
0
    const uint32_t r = src_argb[2];
1320
0
    const uint32_t a = src_argb[3];
1321
0
    const uint32_t b_scale = src_argb1[0];
1322
0
    const uint32_t g_scale = src_argb1[1];
1323
0
    const uint32_t r_scale = src_argb1[2];
1324
0
    const uint32_t a_scale = src_argb1[3];
1325
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b * b_scale + 128) >> 8);
1326
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g * g_scale + 128) >> 8);
1327
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r * r_scale + 128) >> 8);
1328
0
    dst_argb[3] = STATIC_CAST(uint8_t, (a * a_scale + 128) >> 8);
1329
0
    src_argb += 4;
1330
0
    src_argb1 += 4;
1331
0
    dst_argb += 4;
1332
0
  }
1333
0
}
1334
1335
#define SHADE(f, v) clamp255(v + f)
1336
1337
void ARGBAddRow_C(const uint8_t* src_argb,
1338
                  const uint8_t* src_argb1,
1339
                  uint8_t* dst_argb,
1340
0
                  int width) {
1341
0
  int i;
1342
0
  for (i = 0; i < width; ++i) {
1343
0
    const int b = src_argb[0];
1344
0
    const int g = src_argb[1];
1345
0
    const int r = src_argb[2];
1346
0
    const int a = src_argb[3];
1347
0
    const int b_add = src_argb1[0];
1348
0
    const int g_add = src_argb1[1];
1349
0
    const int r_add = src_argb1[2];
1350
0
    const int a_add = src_argb1[3];
1351
0
    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
1352
0
    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
1353
0
    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
1354
0
    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
1355
0
    src_argb += 4;
1356
0
    src_argb1 += 4;
1357
0
    dst_argb += 4;
1358
0
  }
1359
0
}
1360
#undef SHADE
1361
1362
#define SHADE(f, v) clamp0(f - v)
1363
1364
void ARGBSubtractRow_C(const uint8_t* src_argb,
1365
                       const uint8_t* src_argb1,
1366
                       uint8_t* dst_argb,
1367
0
                       int width) {
1368
0
  int i;
1369
0
  for (i = 0; i < width; ++i) {
1370
0
    const int b = src_argb[0];
1371
0
    const int g = src_argb[1];
1372
0
    const int r = src_argb[2];
1373
0
    const int a = src_argb[3];
1374
0
    const int b_sub = src_argb1[0];
1375
0
    const int g_sub = src_argb1[1];
1376
0
    const int r_sub = src_argb1[2];
1377
0
    const int a_sub = src_argb1[3];
1378
0
    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
1379
0
    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
1380
0
    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
1381
0
    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
1382
0
    src_argb += 4;
1383
0
    src_argb1 += 4;
1384
0
    dst_argb += 4;
1385
0
  }
1386
0
}
1387
#undef SHADE
1388
1389
// Sobel functions which mimics SSSE3.
1390
void SobelXRow_C(const uint8_t* src_y0,
1391
                 const uint8_t* src_y1,
1392
                 const uint8_t* src_y2,
1393
                 uint8_t* dst_sobelx,
1394
0
                 int width) {
1395
0
  int i;
1396
0
  for (i = 0; i < width; ++i) {
1397
0
    int a = src_y0[i];
1398
0
    int b = src_y1[i];
1399
0
    int c = src_y2[i];
1400
0
    int a_sub = src_y0[i + 2];
1401
0
    int b_sub = src_y1[i + 2];
1402
0
    int c_sub = src_y2[i + 2];
1403
0
    int a_diff = a - a_sub;
1404
0
    int b_diff = b - b_sub;
1405
0
    int c_diff = c - c_sub;
1406
0
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1407
0
    dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1408
0
  }
1409
0
}
1410
1411
void SobelYRow_C(const uint8_t* src_y0,
1412
                 const uint8_t* src_y1,
1413
                 uint8_t* dst_sobely,
1414
0
                 int width) {
1415
0
  int i;
1416
0
  for (i = 0; i < width; ++i) {
1417
0
    int a = src_y0[i + 0];
1418
0
    int b = src_y0[i + 1];
1419
0
    int c = src_y0[i + 2];
1420
0
    int a_sub = src_y1[i + 0];
1421
0
    int b_sub = src_y1[i + 1];
1422
0
    int c_sub = src_y1[i + 2];
1423
0
    int a_diff = a - a_sub;
1424
0
    int b_diff = b - b_sub;
1425
0
    int c_diff = c - c_sub;
1426
0
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1427
0
    dst_sobely[i] = (uint8_t)(clamp255(sobel));
1428
0
  }
1429
0
}
1430
1431
void SobelRow_C(const uint8_t* src_sobelx,
1432
                const uint8_t* src_sobely,
1433
                uint8_t* dst_argb,
1434
0
                int width) {
1435
0
  int i;
1436
0
  for (i = 0; i < width; ++i) {
1437
0
    int r = src_sobelx[i];
1438
0
    int b = src_sobely[i];
1439
0
    int s = clamp255(r + b);
1440
0
    dst_argb[0] = (uint8_t)(s);
1441
0
    dst_argb[1] = (uint8_t)(s);
1442
0
    dst_argb[2] = (uint8_t)(s);
1443
0
    dst_argb[3] = (uint8_t)(255u);
1444
0
    dst_argb += 4;
1445
0
  }
1446
0
}
1447
1448
void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1449
                       const uint8_t* src_sobely,
1450
                       uint8_t* dst_y,
1451
0
                       int width) {
1452
0
  int i;
1453
0
  for (i = 0; i < width; ++i) {
1454
0
    int r = src_sobelx[i];
1455
0
    int b = src_sobely[i];
1456
0
    int s = clamp255(r + b);
1457
0
    dst_y[i] = (uint8_t)(s);
1458
0
  }
1459
0
}
1460
1461
void SobelXYRow_C(const uint8_t* src_sobelx,
1462
                  const uint8_t* src_sobely,
1463
                  uint8_t* dst_argb,
1464
0
                  int width) {
1465
0
  int i;
1466
0
  for (i = 0; i < width; ++i) {
1467
0
    int r = src_sobelx[i];
1468
0
    int b = src_sobely[i];
1469
0
    int g = clamp255(r + b);
1470
0
    dst_argb[0] = (uint8_t)(b);
1471
0
    dst_argb[1] = (uint8_t)(g);
1472
0
    dst_argb[2] = (uint8_t)(r);
1473
0
    dst_argb[3] = (uint8_t)(255u);
1474
0
    dst_argb += 4;
1475
0
  }
1476
0
}
1477
1478
0
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1479
  // Copy a Y to RGB.
1480
0
  int x;
1481
0
  for (x = 0; x < width; ++x) {
1482
0
    uint8_t y = src_y[0];
1483
0
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1484
0
    dst_argb[3] = 255u;
1485
0
    dst_argb += 4;
1486
0
    ++src_y;
1487
0
  }
1488
0
}
1489
1490
// Macros to create SIMD specific yuv to rgb conversion constants.
1491
1492
// clang-format off
1493
1494
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1495
// Bias values include subtract 128 from U and V, bias from Y and rounding.
1496
// For B and R bias is negative. For G bias is positive.
1497
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR)                             \
1498
  {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                     \
1499
   {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1500
    0, 0}}
1501
#else
1502
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR)                     \
1503
  {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,          \
1504
    UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},         \
1505
   {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,  \
1506
    UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1507
   {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,          \
1508
    0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},         \
1509
   {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1510
   {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1511
#endif
1512
1513
// clang-format on
1514
1515
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR)            \
1516
  const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1517
      YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR);                   \
1518
  const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1519
      YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1520
1521
// TODO(fbarchard): Generate SIMD structures from float matrix.
1522
1523
// BT.601 limited range YUV to RGB reference
1524
//  R = (Y - 16) * 1.164             + V * 1.596
1525
//  G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1526
//  B = (Y - 16) * 1.164 + U * 2.018
1527
// KR = 0.299; KB = 0.114
1528
1529
// U and V contributions to R,G,B.
1530
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
1531
#define UB 129 /* round(2.018 * 64) */
1532
#else
1533
#define UB 128 /* max(128, round(2.018 * 64)) */
1534
#endif
1535
#define UG 25  /* round(0.391 * 64) */
1536
#define VG 52  /* round(0.813 * 64) */
1537
#define VR 102 /* round(1.596 * 64) */
1538
1539
// Y contribution to R,G,B.  Scale and bias.
1540
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1541
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1542
1543
MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1544
1545
#undef YG
1546
#undef YB
1547
#undef UB
1548
#undef UG
1549
#undef VG
1550
#undef VR
1551
1552
// BT.601 full range YUV to RGB reference (aka JPEG)
1553
// *  R = Y               + V * 1.40200
1554
// *  G = Y - U * 0.34414 - V * 0.71414
1555
// *  B = Y + U * 1.77200
1556
// KR = 0.299; KB = 0.114
1557
1558
// U and V contributions to R,G,B.
1559
#define UB 113 /* round(1.77200 * 64) */
1560
#define UG 22  /* round(0.34414 * 64) */
1561
#define VG 46  /* round(0.71414 * 64) */
1562
#define VR 90  /* round(1.40200 * 64) */
1563
1564
// Y contribution to R,G,B.  Scale and bias.
1565
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1566
#define YB 32    /* 64 / 2 */
1567
1568
MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1569
1570
#undef YG
1571
#undef YB
1572
#undef UB
1573
#undef UG
1574
#undef VG
1575
#undef VR
1576
1577
// BT.709 limited range YUV to RGB reference
1578
//  R = (Y - 16) * 1.164             + V * 1.793
1579
//  G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1580
//  B = (Y - 16) * 1.164 + U * 2.112
1581
//  KR = 0.2126, KB = 0.0722
1582
1583
// U and V contributions to R,G,B.
1584
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
1585
#define UB 135 /* round(2.112 * 64) */
1586
#else
1587
#define UB 128 /* max(128, round(2.112 * 64)) */
1588
#endif
1589
#define UG 14  /* round(0.213 * 64) */
1590
#define VG 34  /* round(0.533 * 64) */
1591
#define VR 115 /* round(1.793 * 64) */
1592
1593
// Y contribution to R,G,B.  Scale and bias.
1594
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1595
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1596
1597
MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1598
1599
#undef YG
1600
#undef YB
1601
#undef UB
1602
#undef UG
1603
#undef VG
1604
#undef VR
1605
1606
// BT.709 full range YUV to RGB reference
1607
//  R = Y               + V * 1.5748
1608
//  G = Y - U * 0.18732 - V * 0.46812
1609
//  B = Y + U * 1.8556
1610
//  KR = 0.2126, KB = 0.0722
1611
1612
// U and V contributions to R,G,B.
1613
#define UB 119 /* round(1.8556 * 64) */
1614
#define UG 12  /* round(0.18732 * 64) */
1615
#define VG 30  /* round(0.46812 * 64) */
1616
#define VR 101 /* round(1.5748 * 64) */
1617
1618
// Y contribution to R,G,B.  Scale and bias.  (same as jpeg)
1619
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1620
#define YB 32    /* 64 / 2 */
1621
1622
MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1623
1624
#undef YG
1625
#undef YB
1626
#undef UB
1627
#undef UG
1628
#undef VG
1629
#undef VR
1630
1631
// BT.2020 limited range YUV to RGB reference
1632
//  R = (Y - 16) * 1.164384                + V * 1.67867
1633
//  G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1634
//  B = (Y - 16) * 1.164384 + U * 2.14177
1635
// KR = 0.2627; KB = 0.0593
1636
1637
// U and V contributions to R,G,B.
1638
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
1639
#define UB 137 /* round(2.142 * 64) */
1640
#else
1641
#define UB 128 /* max(128, round(2.142 * 64)) */
1642
#endif
1643
#define UG 12  /* round(0.187326 * 64) */
1644
#define VG 42  /* round(0.65042 * 64) */
1645
#define VR 107 /* round(1.67867 * 64) */
1646
1647
// Y contribution to R,G,B.  Scale and bias.
1648
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1649
#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1650
1651
MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1652
1653
#undef YG
1654
#undef YB
1655
#undef UB
1656
#undef UG
1657
#undef VG
1658
#undef VR
1659
1660
// BT.2020 full range YUV to RGB reference
1661
//  R = Y                + V * 1.474600
1662
//  G = Y - U * 0.164553 - V * 0.571353
1663
//  B = Y + U * 1.881400
1664
// KR = 0.2627; KB = 0.0593
1665
1666
#define UB 120 /* round(1.881400 * 64) */
1667
#define UG 11  /* round(0.164553 * 64) */
1668
#define VG 37  /* round(0.571353 * 64) */
1669
#define VR 94  /* round(1.474600 * 64) */
1670
1671
// Y contribution to R,G,B.  Scale and bias.  (same as jpeg)
1672
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1673
#define YB 32    /* 64 / 2 */
1674
1675
MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1676
1677
#undef YG
1678
#undef YB
1679
#undef UB
1680
#undef UG
1681
#undef VG
1682
#undef VR
1683
1684
#undef BB
1685
#undef BG
1686
#undef BR
1687
1688
#undef MAKEYUVCONSTANTS
1689
1690
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1691
#define LOAD_YUV_CONSTANTS                 \
1692
  int ub = yuvconstants->kUVCoeff[0];      \
1693
  int vr = yuvconstants->kUVCoeff[1];      \
1694
  int ug = yuvconstants->kUVCoeff[2];      \
1695
  int vg = yuvconstants->kUVCoeff[3];      \
1696
  int yg = yuvconstants->kRGBCoeffBias[0]; \
1697
  int bb = yuvconstants->kRGBCoeffBias[1]; \
1698
  int bg = yuvconstants->kRGBCoeffBias[2]; \
1699
  int br = yuvconstants->kRGBCoeffBias[3]
1700
1701
#define CALC_RGB16                         \
1702
  int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1703
  int b16 = y1 + (u * ub) - bb;            \
1704
  int g16 = y1 + bg - (u * ug + v * vg);   \
1705
  int r16 = y1 + (v * vr) - br
1706
#else
1707
#define LOAD_YUV_CONSTANTS           \
1708
0
  int ub = yuvconstants->kUVToB[0];  \
1709
0
  int ug = yuvconstants->kUVToG[0];  \
1710
0
  int vg = yuvconstants->kUVToG[1];  \
1711
0
  int vr = yuvconstants->kUVToR[1];  \
1712
0
  int yg = yuvconstants->kYToRgb[0]; \
1713
0
  int yb = yuvconstants->kYBiasToRgb[0]
1714
1715
#define CALC_RGB16                                \
1716
0
  int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1717
0
  int8_t ui = (int8_t)u;                          \
1718
0
  int8_t vi = (int8_t)v;                          \
1719
0
  ui -= 0x80;                                     \
1720
0
  vi -= 0x80;                                     \
1721
0
  int b16 = y1 + (ui * ub);                       \
1722
0
  int g16 = y1 - (ui * ug + vi * vg);             \
1723
0
  int r16 = y1 + (vi * vr)
1724
#endif
1725
1726
// C reference code that mimics the YUV assembly.
1727
// Reads 8 bit YUV and leaves result as 16 bit.
1728
static __inline void YuvPixel(uint8_t y,
1729
                              uint8_t u,
1730
                              uint8_t v,
1731
                              uint8_t* b,
1732
                              uint8_t* g,
1733
                              uint8_t* r,
1734
0
                              const struct YuvConstants* yuvconstants) {
1735
0
  LOAD_YUV_CONSTANTS;
1736
0
  uint32_t y32 = y * 0x0101;
1737
0
  CALC_RGB16;
1738
0
  *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1739
0
  *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1740
0
  *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1741
0
}
1742
1743
// Reads 8 bit YUV and leaves result as 16 bit.
1744
static __inline void YuvPixel8_16(uint8_t y,
1745
                                  uint8_t u,
1746
                                  uint8_t v,
1747
                                  int* b,
1748
                                  int* g,
1749
                                  int* r,
1750
0
                                  const struct YuvConstants* yuvconstants) {
1751
0
  LOAD_YUV_CONSTANTS;
1752
0
  uint32_t y32 = y * 0x0101;
1753
0
  CALC_RGB16;
1754
0
  *b = b16;
1755
0
  *g = g16;
1756
0
  *r = r16;
1757
0
}
1758
1759
// C reference code that mimics the YUV 16 bit assembly.
1760
// Reads 10 bit YUV and leaves result as 16 bit.
1761
static __inline void YuvPixel10_16(uint16_t y,
1762
                                   uint16_t u,
1763
                                   uint16_t v,
1764
                                   int* b,
1765
                                   int* g,
1766
                                   int* r,
1767
0
                                   const struct YuvConstants* yuvconstants) {
1768
0
  LOAD_YUV_CONSTANTS;
1769
0
  uint32_t y32 = (y << 6) | (y >> 4);
1770
0
  u = STATIC_CAST(uint8_t, clamp255(u >> 2));
1771
0
  v = STATIC_CAST(uint8_t, clamp255(v >> 2));
1772
0
  CALC_RGB16;
1773
0
  *b = b16;
1774
0
  *g = g16;
1775
0
  *r = r16;
1776
0
}
1777
1778
// C reference code that mimics the YUV 16 bit assembly.
1779
// Reads 12 bit YUV and leaves result as 16 bit.
1780
static __inline void YuvPixel12_16(int16_t y,
1781
                                   int16_t u,
1782
                                   int16_t v,
1783
                                   int* b,
1784
                                   int* g,
1785
                                   int* r,
1786
0
                                   const struct YuvConstants* yuvconstants) {
1787
0
  LOAD_YUV_CONSTANTS;
1788
0
  uint32_t y32 = (y << 4) | (y >> 8);
1789
0
  u = STATIC_CAST(uint8_t, clamp255(u >> 4));
1790
0
  v = STATIC_CAST(uint8_t, clamp255(v >> 4));
1791
0
  CALC_RGB16;
1792
0
  *b = b16;
1793
0
  *g = g16;
1794
0
  *r = r16;
1795
0
}
1796
1797
// C reference code that mimics the YUV 10 bit assembly.
1798
// Reads 10 bit YUV and clamps down to 8 bit RGB.
1799
static __inline void YuvPixel10(uint16_t y,
1800
                                uint16_t u,
1801
                                uint16_t v,
1802
                                uint8_t* b,
1803
                                uint8_t* g,
1804
                                uint8_t* r,
1805
0
                                const struct YuvConstants* yuvconstants) {
1806
0
  int b16;
1807
0
  int g16;
1808
0
  int r16;
1809
0
  YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1810
0
  *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1811
0
  *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1812
0
  *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1813
0
}
1814
1815
// C reference code that mimics the YUV 12 bit assembly.
1816
// Reads 12 bit YUV and clamps down to 8 bit RGB.
1817
static __inline void YuvPixel12(uint16_t y,
1818
                                uint16_t u,
1819
                                uint16_t v,
1820
                                uint8_t* b,
1821
                                uint8_t* g,
1822
                                uint8_t* r,
1823
0
                                const struct YuvConstants* yuvconstants) {
1824
0
  int b16;
1825
0
  int g16;
1826
0
  int r16;
1827
0
  YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1828
0
  *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1829
0
  *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1830
0
  *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1831
0
}
1832
1833
// C reference code that mimics the YUV 16 bit assembly.
1834
// Reads 16 bit YUV and leaves result as 8 bit.
1835
static __inline void YuvPixel16_8(uint16_t y,
1836
                                  uint16_t u,
1837
                                  uint16_t v,
1838
                                  uint8_t* b,
1839
                                  uint8_t* g,
1840
                                  uint8_t* r,
1841
0
                                  const struct YuvConstants* yuvconstants) {
1842
0
  LOAD_YUV_CONSTANTS;
1843
0
  uint32_t y32 = y;
1844
0
  u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1845
0
  v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1846
0
  CALC_RGB16;
1847
0
  *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1848
0
  *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1849
0
  *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1850
0
}
1851
1852
// C reference code that mimics the YUV 16 bit assembly.
1853
// Reads 16 bit YUV and leaves result as 16 bit.
1854
static __inline void YuvPixel16_16(uint16_t y,
1855
                                   uint16_t u,
1856
                                   uint16_t v,
1857
                                   int* b,
1858
                                   int* g,
1859
                                   int* r,
1860
0
                                   const struct YuvConstants* yuvconstants) {
1861
0
  LOAD_YUV_CONSTANTS;
1862
0
  uint32_t y32 = y;
1863
0
  u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1864
0
  v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1865
0
  CALC_RGB16;
1866
0
  *b = b16;
1867
0
  *g = g16;
1868
0
  *r = r16;
1869
0
}
1870
1871
// C reference code that mimics the YUV assembly.
1872
// Reads 8 bit YUV and leaves result as 8 bit.
1873
static __inline void YPixel(uint8_t y,
1874
                            uint8_t* b,
1875
                            uint8_t* g,
1876
                            uint8_t* r,
1877
0
                            const struct YuvConstants* yuvconstants) {
1878
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1879
  int yg = yuvconstants->kRGBCoeffBias[0];
1880
  int ygb = yuvconstants->kRGBCoeffBias[4];
1881
#else
1882
0
  int ygb = yuvconstants->kYBiasToRgb[0];
1883
0
  int yg = yuvconstants->kYToRgb[0];
1884
0
#endif
1885
0
  uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1886
0
  uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
1887
0
  *b = b8;
1888
0
  *g = b8;
1889
0
  *r = b8;
1890
0
}
1891
1892
void I444ToARGBRow_C(const uint8_t* src_y,
1893
                     const uint8_t* src_u,
1894
                     const uint8_t* src_v,
1895
                     uint8_t* rgb_buf,
1896
                     const struct YuvConstants* yuvconstants,
1897
0
                     int width) {
1898
0
  int x;
1899
0
  for (x = 0; x < width; ++x) {
1900
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1901
0
             rgb_buf + 2, yuvconstants);
1902
0
    rgb_buf[3] = 255;
1903
0
    src_y += 1;
1904
0
    src_u += 1;
1905
0
    src_v += 1;
1906
0
    rgb_buf += 4;  // Advance 1 pixel.
1907
0
  }
1908
0
}
1909
1910
void I444ToRGB24Row_C(const uint8_t* src_y,
1911
                      const uint8_t* src_u,
1912
                      const uint8_t* src_v,
1913
                      uint8_t* rgb_buf,
1914
                      const struct YuvConstants* yuvconstants,
1915
0
                      int width) {
1916
0
  int x;
1917
0
  for (x = 0; x < width; ++x) {
1918
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1919
0
             rgb_buf + 2, yuvconstants);
1920
0
    src_y += 1;
1921
0
    src_u += 1;
1922
0
    src_v += 1;
1923
0
    rgb_buf += 3;  // Advance 1 pixel.
1924
0
  }
1925
0
}
1926
1927
// Also used for 420
1928
void I422ToARGBRow_C(const uint8_t* src_y,
1929
                     const uint8_t* src_u,
1930
                     const uint8_t* src_v,
1931
                     uint8_t* rgb_buf,
1932
                     const struct YuvConstants* yuvconstants,
1933
0
                     int width) {
1934
0
  int x;
1935
0
  for (x = 0; x < width - 1; x += 2) {
1936
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1937
0
             rgb_buf + 2, yuvconstants);
1938
0
    rgb_buf[3] = 255;
1939
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1940
0
             rgb_buf + 6, yuvconstants);
1941
0
    rgb_buf[7] = 255;
1942
0
    src_y += 2;
1943
0
    src_u += 1;
1944
0
    src_v += 1;
1945
0
    rgb_buf += 8;  // Advance 2 pixels.
1946
0
  }
1947
0
  if (width & 1) {
1948
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1949
0
             rgb_buf + 2, yuvconstants);
1950
0
    rgb_buf[3] = 255;
1951
0
  }
1952
0
}
1953
1954
// 10 bit YUV to ARGB
1955
void I210ToARGBRow_C(const uint16_t* src_y,
1956
                     const uint16_t* src_u,
1957
                     const uint16_t* src_v,
1958
                     uint8_t* rgb_buf,
1959
                     const struct YuvConstants* yuvconstants,
1960
0
                     int width) {
1961
0
  int x;
1962
0
  for (x = 0; x < width - 1; x += 2) {
1963
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1964
0
               rgb_buf + 2, yuvconstants);
1965
0
    rgb_buf[3] = 255;
1966
0
    YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1967
0
               rgb_buf + 6, yuvconstants);
1968
0
    rgb_buf[7] = 255;
1969
0
    src_y += 2;
1970
0
    src_u += 1;
1971
0
    src_v += 1;
1972
0
    rgb_buf += 8;  // Advance 2 pixels.
1973
0
  }
1974
0
  if (width & 1) {
1975
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1976
0
               rgb_buf + 2, yuvconstants);
1977
0
    rgb_buf[3] = 255;
1978
0
  }
1979
0
}
1980
1981
void I410ToARGBRow_C(const uint16_t* src_y,
1982
                     const uint16_t* src_u,
1983
                     const uint16_t* src_v,
1984
                     uint8_t* rgb_buf,
1985
                     const struct YuvConstants* yuvconstants,
1986
0
                     int width) {
1987
0
  int x;
1988
0
  for (x = 0; x < width; ++x) {
1989
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1990
0
               rgb_buf + 2, yuvconstants);
1991
0
    rgb_buf[3] = 255;
1992
0
    src_y += 1;
1993
0
    src_u += 1;
1994
0
    src_v += 1;
1995
0
    rgb_buf += 4;  // Advance 1 pixels.
1996
0
  }
1997
0
}
1998
1999
void I210AlphaToARGBRow_C(const uint16_t* src_y,
2000
                          const uint16_t* src_u,
2001
                          const uint16_t* src_v,
2002
                          const uint16_t* src_a,
2003
                          uint8_t* rgb_buf,
2004
                          const struct YuvConstants* yuvconstants,
2005
0
                          int width) {
2006
0
  int x;
2007
0
  for (x = 0; x < width - 1; x += 2) {
2008
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2009
0
               rgb_buf + 2, yuvconstants);
2010
0
    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2011
0
    YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2012
0
               rgb_buf + 6, yuvconstants);
2013
0
    rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
2014
0
    src_y += 2;
2015
0
    src_u += 1;
2016
0
    src_v += 1;
2017
0
    src_a += 2;
2018
0
    rgb_buf += 8;  // Advance 2 pixels.
2019
0
  }
2020
0
  if (width & 1) {
2021
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2022
0
               rgb_buf + 2, yuvconstants);
2023
0
    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2024
0
  }
2025
0
}
2026
2027
void I410AlphaToARGBRow_C(const uint16_t* src_y,
2028
                          const uint16_t* src_u,
2029
                          const uint16_t* src_v,
2030
                          const uint16_t* src_a,
2031
                          uint8_t* rgb_buf,
2032
                          const struct YuvConstants* yuvconstants,
2033
0
                          int width) {
2034
0
  int x;
2035
0
  for (x = 0; x < width; ++x) {
2036
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2037
0
               rgb_buf + 2, yuvconstants);
2038
0
    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2039
0
    src_y += 1;
2040
0
    src_u += 1;
2041
0
    src_v += 1;
2042
0
    src_a += 1;
2043
0
    rgb_buf += 4;  // Advance 1 pixels.
2044
0
  }
2045
0
}
2046
2047
// 12 bit YUV to ARGB
2048
void I212ToARGBRow_C(const uint16_t* src_y,
2049
                     const uint16_t* src_u,
2050
                     const uint16_t* src_v,
2051
                     uint8_t* rgb_buf,
2052
                     const struct YuvConstants* yuvconstants,
2053
0
                     int width) {
2054
0
  int x;
2055
0
  for (x = 0; x < width - 1; x += 2) {
2056
0
    YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2057
0
               rgb_buf + 2, yuvconstants);
2058
0
    rgb_buf[3] = 255;
2059
0
    YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2060
0
               rgb_buf + 6, yuvconstants);
2061
0
    rgb_buf[7] = 255;
2062
0
    src_y += 2;
2063
0
    src_u += 1;
2064
0
    src_v += 1;
2065
0
    rgb_buf += 8;  // Advance 2 pixels.
2066
0
  }
2067
0
  if (width & 1) {
2068
0
    YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2069
0
               rgb_buf + 2, yuvconstants);
2070
0
    rgb_buf[3] = 255;
2071
0
  }
2072
0
}
2073
2074
0
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
2075
0
  uint32_t ar30;
2076
0
  b = b >> 4;  // convert 8 bit 10.6 to 10 bit.
2077
0
  g = g >> 4;
2078
0
  r = r >> 4;
2079
0
  b = Clamp10(b);
2080
0
  g = Clamp10(g);
2081
0
  r = Clamp10(r);
2082
0
  ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
2083
0
  (*(uint32_t*)rgb_buf) = ar30;
2084
0
}
2085
2086
// 10 bit YUV to 10 bit AR30
2087
void I210ToAR30Row_C(const uint16_t* src_y,
2088
                     const uint16_t* src_u,
2089
                     const uint16_t* src_v,
2090
                     uint8_t* rgb_buf,
2091
                     const struct YuvConstants* yuvconstants,
2092
0
                     int width) {
2093
0
  int x;
2094
0
  int b;
2095
0
  int g;
2096
0
  int r;
2097
0
  for (x = 0; x < width - 1; x += 2) {
2098
0
    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2099
0
    StoreAR30(rgb_buf, b, g, r);
2100
0
    YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2101
0
    StoreAR30(rgb_buf + 4, b, g, r);
2102
0
    src_y += 2;
2103
0
    src_u += 1;
2104
0
    src_v += 1;
2105
0
    rgb_buf += 8;  // Advance 2 pixels.
2106
0
  }
2107
0
  if (width & 1) {
2108
0
    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2109
0
    StoreAR30(rgb_buf, b, g, r);
2110
0
  }
2111
0
}
2112
2113
// 12 bit YUV to 10 bit AR30
2114
void I212ToAR30Row_C(const uint16_t* src_y,
2115
                     const uint16_t* src_u,
2116
                     const uint16_t* src_v,
2117
                     uint8_t* rgb_buf,
2118
                     const struct YuvConstants* yuvconstants,
2119
0
                     int width) {
2120
0
  int x;
2121
0
  int b;
2122
0
  int g;
2123
0
  int r;
2124
0
  for (x = 0; x < width - 1; x += 2) {
2125
0
    YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2126
0
    StoreAR30(rgb_buf, b, g, r);
2127
0
    YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2128
0
    StoreAR30(rgb_buf + 4, b, g, r);
2129
0
    src_y += 2;
2130
0
    src_u += 1;
2131
0
    src_v += 1;
2132
0
    rgb_buf += 8;  // Advance 2 pixels.
2133
0
  }
2134
0
  if (width & 1) {
2135
0
    YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2136
0
    StoreAR30(rgb_buf, b, g, r);
2137
0
  }
2138
0
}
2139
2140
void I410ToAR30Row_C(const uint16_t* src_y,
2141
                     const uint16_t* src_u,
2142
                     const uint16_t* src_v,
2143
                     uint8_t* rgb_buf,
2144
                     const struct YuvConstants* yuvconstants,
2145
0
                     int width) {
2146
0
  int x;
2147
0
  int b;
2148
0
  int g;
2149
0
  int r;
2150
0
  for (x = 0; x < width; ++x) {
2151
0
    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2152
0
    StoreAR30(rgb_buf, b, g, r);
2153
0
    src_y += 1;
2154
0
    src_u += 1;
2155
0
    src_v += 1;
2156
0
    rgb_buf += 4;  // Advance 1 pixel.
2157
0
  }
2158
0
}
2159
2160
// P210 has 10 bits in msb of 16 bit NV12 style layout.
2161
void P210ToARGBRow_C(const uint16_t* src_y,
2162
                     const uint16_t* src_uv,
2163
                     uint8_t* dst_argb,
2164
                     const struct YuvConstants* yuvconstants,
2165
0
                     int width) {
2166
0
  int x;
2167
0
  for (x = 0; x < width - 1; x += 2) {
2168
0
    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2169
0
                 dst_argb + 2, yuvconstants);
2170
0
    dst_argb[3] = 255;
2171
0
    YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2172
0
                 dst_argb + 6, yuvconstants);
2173
0
    dst_argb[7] = 255;
2174
0
    src_y += 2;
2175
0
    src_uv += 2;
2176
0
    dst_argb += 8;  // Advance 2 pixels.
2177
0
  }
2178
0
  if (width & 1) {
2179
0
    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2180
0
                 dst_argb + 2, yuvconstants);
2181
0
    dst_argb[3] = 255;
2182
0
  }
2183
0
}
2184
2185
void P410ToARGBRow_C(const uint16_t* src_y,
2186
                     const uint16_t* src_uv,
2187
                     uint8_t* dst_argb,
2188
                     const struct YuvConstants* yuvconstants,
2189
0
                     int width) {
2190
0
  int x;
2191
0
  for (x = 0; x < width; ++x) {
2192
0
    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2193
0
                 dst_argb + 2, yuvconstants);
2194
0
    dst_argb[3] = 255;
2195
0
    src_y += 1;
2196
0
    src_uv += 2;
2197
0
    dst_argb += 4;  // Advance 1 pixels.
2198
0
  }
2199
0
}
2200
2201
void P210ToAR30Row_C(const uint16_t* src_y,
2202
                     const uint16_t* src_uv,
2203
                     uint8_t* dst_ar30,
2204
                     const struct YuvConstants* yuvconstants,
2205
0
                     int width) {
2206
0
  int x;
2207
0
  int b;
2208
0
  int g;
2209
0
  int r;
2210
0
  for (x = 0; x < width - 1; x += 2) {
2211
0
    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2212
0
    StoreAR30(dst_ar30, b, g, r);
2213
0
    YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2214
0
    StoreAR30(dst_ar30 + 4, b, g, r);
2215
0
    src_y += 2;
2216
0
    src_uv += 2;
2217
0
    dst_ar30 += 8;  // Advance 2 pixels.
2218
0
  }
2219
0
  if (width & 1) {
2220
0
    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2221
0
    StoreAR30(dst_ar30, b, g, r);
2222
0
  }
2223
0
}
2224
2225
void P410ToAR30Row_C(const uint16_t* src_y,
2226
                     const uint16_t* src_uv,
2227
                     uint8_t* dst_ar30,
2228
                     const struct YuvConstants* yuvconstants,
2229
0
                     int width) {
2230
0
  int x;
2231
0
  int b;
2232
0
  int g;
2233
0
  int r;
2234
0
  for (x = 0; x < width; ++x) {
2235
0
    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2236
0
    StoreAR30(dst_ar30, b, g, r);
2237
0
    src_y += 1;
2238
0
    src_uv += 2;
2239
0
    dst_ar30 += 4;  // Advance 1 pixel.
2240
0
  }
2241
0
}
2242
2243
// 8 bit YUV to 10 bit AR30
2244
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
2245
void I422ToAR30Row_C(const uint8_t* src_y,
2246
                     const uint8_t* src_u,
2247
                     const uint8_t* src_v,
2248
                     uint8_t* rgb_buf,
2249
                     const struct YuvConstants* yuvconstants,
2250
0
                     int width) {
2251
0
  int x;
2252
0
  int b;
2253
0
  int g;
2254
0
  int r;
2255
0
  for (x = 0; x < width - 1; x += 2) {
2256
0
    YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2257
0
    StoreAR30(rgb_buf, b, g, r);
2258
0
    YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2259
0
    StoreAR30(rgb_buf + 4, b, g, r);
2260
0
    src_y += 2;
2261
0
    src_u += 1;
2262
0
    src_v += 1;
2263
0
    rgb_buf += 8;  // Advance 2 pixels.
2264
0
  }
2265
0
  if (width & 1) {
2266
0
    YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2267
0
    StoreAR30(rgb_buf, b, g, r);
2268
0
  }
2269
0
}
2270
2271
void I444AlphaToARGBRow_C(const uint8_t* src_y,
2272
                          const uint8_t* src_u,
2273
                          const uint8_t* src_v,
2274
                          const uint8_t* src_a,
2275
                          uint8_t* rgb_buf,
2276
                          const struct YuvConstants* yuvconstants,
2277
0
                          int width) {
2278
0
  int x;
2279
0
  for (x = 0; x < width; ++x) {
2280
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2281
0
             rgb_buf + 2, yuvconstants);
2282
0
    rgb_buf[3] = src_a[0];
2283
0
    src_y += 1;
2284
0
    src_u += 1;
2285
0
    src_v += 1;
2286
0
    src_a += 1;
2287
0
    rgb_buf += 4;  // Advance 1 pixel.
2288
0
  }
2289
0
}
2290
2291
void I422AlphaToARGBRow_C(const uint8_t* src_y,
2292
                          const uint8_t* src_u,
2293
                          const uint8_t* src_v,
2294
                          const uint8_t* src_a,
2295
                          uint8_t* rgb_buf,
2296
                          const struct YuvConstants* yuvconstants,
2297
0
                          int width) {
2298
0
  int x;
2299
0
  for (x = 0; x < width - 1; x += 2) {
2300
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2301
0
             rgb_buf + 2, yuvconstants);
2302
0
    rgb_buf[3] = src_a[0];
2303
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2304
0
             rgb_buf + 6, yuvconstants);
2305
0
    rgb_buf[7] = src_a[1];
2306
0
    src_y += 2;
2307
0
    src_u += 1;
2308
0
    src_v += 1;
2309
0
    src_a += 2;
2310
0
    rgb_buf += 8;  // Advance 2 pixels.
2311
0
  }
2312
0
  if (width & 1) {
2313
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2314
0
             rgb_buf + 2, yuvconstants);
2315
0
    rgb_buf[3] = src_a[0];
2316
0
  }
2317
0
}
2318
2319
void I422ToRGB24Row_C(const uint8_t* src_y,
2320
                      const uint8_t* src_u,
2321
                      const uint8_t* src_v,
2322
                      uint8_t* rgb_buf,
2323
                      const struct YuvConstants* yuvconstants,
2324
0
                      int width) {
2325
0
  int x;
2326
0
  for (x = 0; x < width - 1; x += 2) {
2327
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2328
0
             rgb_buf + 2, yuvconstants);
2329
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2330
0
             rgb_buf + 5, yuvconstants);
2331
0
    src_y += 2;
2332
0
    src_u += 1;
2333
0
    src_v += 1;
2334
0
    rgb_buf += 6;  // Advance 2 pixels.
2335
0
  }
2336
0
  if (width & 1) {
2337
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2338
0
             rgb_buf + 2, yuvconstants);
2339
0
  }
2340
0
}
2341
2342
void I422ToARGB4444Row_C(const uint8_t* src_y,
2343
                         const uint8_t* src_u,
2344
                         const uint8_t* src_v,
2345
                         uint8_t* dst_argb4444,
2346
                         const struct YuvConstants* yuvconstants,
2347
0
                         int width) {
2348
0
  uint8_t b0;
2349
0
  uint8_t g0;
2350
0
  uint8_t r0;
2351
0
  uint8_t b1;
2352
0
  uint8_t g1;
2353
0
  uint8_t r1;
2354
0
  int x;
2355
0
  for (x = 0; x < width - 1; x += 2) {
2356
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2357
0
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2358
0
    b0 = b0 >> 4;
2359
0
    g0 = g0 >> 4;
2360
0
    r0 = r0 >> 4;
2361
0
    b1 = b1 >> 4;
2362
0
    g1 = g1 >> 4;
2363
0
    r1 = r1 >> 4;
2364
0
    *(uint16_t*)(dst_argb4444 + 0) =
2365
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2366
0
    *(uint16_t*)(dst_argb4444 + 2) =
2367
0
        STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
2368
0
    src_y += 2;
2369
0
    src_u += 1;
2370
0
    src_v += 1;
2371
0
    dst_argb4444 += 4;  // Advance 2 pixels.
2372
0
  }
2373
0
  if (width & 1) {
2374
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2375
0
    b0 = b0 >> 4;
2376
0
    g0 = g0 >> 4;
2377
0
    r0 = r0 >> 4;
2378
0
    *(uint16_t*)(dst_argb4444) =
2379
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2380
0
  }
2381
0
}
2382
2383
void I422ToARGB1555Row_C(const uint8_t* src_y,
2384
                         const uint8_t* src_u,
2385
                         const uint8_t* src_v,
2386
                         uint8_t* dst_argb1555,
2387
                         const struct YuvConstants* yuvconstants,
2388
0
                         int width) {
2389
0
  uint8_t b0;
2390
0
  uint8_t g0;
2391
0
  uint8_t r0;
2392
0
  uint8_t b1;
2393
0
  uint8_t g1;
2394
0
  uint8_t r1;
2395
0
  int x;
2396
0
  for (x = 0; x < width - 1; x += 2) {
2397
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2398
0
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2399
0
    b0 = b0 >> 3;
2400
0
    g0 = g0 >> 3;
2401
0
    r0 = r0 >> 3;
2402
0
    b1 = b1 >> 3;
2403
0
    g1 = g1 >> 3;
2404
0
    r1 = r1 >> 3;
2405
0
    *(uint16_t*)(dst_argb1555 + 0) =
2406
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2407
0
    *(uint16_t*)(dst_argb1555 + 2) =
2408
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
2409
0
    src_y += 2;
2410
0
    src_u += 1;
2411
0
    src_v += 1;
2412
0
    dst_argb1555 += 4;  // Advance 2 pixels.
2413
0
  }
2414
0
  if (width & 1) {
2415
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2416
0
    b0 = b0 >> 3;
2417
0
    g0 = g0 >> 3;
2418
0
    r0 = r0 >> 3;
2419
0
    *(uint16_t*)(dst_argb1555) =
2420
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2421
0
  }
2422
0
}
2423
2424
void I422ToRGB565Row_C(const uint8_t* src_y,
2425
                       const uint8_t* src_u,
2426
                       const uint8_t* src_v,
2427
                       uint8_t* dst_rgb565,
2428
                       const struct YuvConstants* yuvconstants,
2429
0
                       int width) {
2430
0
  uint8_t b0;
2431
0
  uint8_t g0;
2432
0
  uint8_t r0;
2433
0
  uint8_t b1;
2434
0
  uint8_t g1;
2435
0
  uint8_t r1;
2436
0
  int x;
2437
0
  for (x = 0; x < width - 1; x += 2) {
2438
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2439
0
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2440
0
    b0 = b0 >> 3;
2441
0
    g0 = g0 >> 2;
2442
0
    r0 = r0 >> 3;
2443
0
    b1 = b1 >> 3;
2444
0
    g1 = g1 >> 2;
2445
0
    r1 = r1 >> 3;
2446
0
    *(uint16_t*)(dst_rgb565 + 0) =
2447
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2448
0
    *(uint16_t*)(dst_rgb565 + 2) =
2449
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
2450
0
    src_y += 2;
2451
0
    src_u += 1;
2452
0
    src_v += 1;
2453
0
    dst_rgb565 += 4;  // Advance 2 pixels.
2454
0
  }
2455
0
  if (width & 1) {
2456
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2457
0
    b0 = b0 >> 3;
2458
0
    g0 = g0 >> 2;
2459
0
    r0 = r0 >> 3;
2460
0
    *(uint16_t*)(dst_rgb565 + 0) =
2461
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2462
0
  }
2463
0
}
2464
2465
void NV12ToARGBRow_C(const uint8_t* src_y,
2466
                     const uint8_t* src_uv,
2467
                     uint8_t* rgb_buf,
2468
                     const struct YuvConstants* yuvconstants,
2469
0
                     int width) {
2470
0
  int x;
2471
0
  for (x = 0; x < width - 1; x += 2) {
2472
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2473
0
             rgb_buf + 2, yuvconstants);
2474
0
    rgb_buf[3] = 255;
2475
0
    YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2476
0
             rgb_buf + 6, yuvconstants);
2477
0
    rgb_buf[7] = 255;
2478
0
    src_y += 2;
2479
0
    src_uv += 2;
2480
0
    rgb_buf += 8;  // Advance 2 pixels.
2481
0
  }
2482
0
  if (width & 1) {
2483
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2484
0
             rgb_buf + 2, yuvconstants);
2485
0
    rgb_buf[3] = 255;
2486
0
  }
2487
0
}
2488
2489
void NV21ToARGBRow_C(const uint8_t* src_y,
2490
                     const uint8_t* src_vu,
2491
                     uint8_t* rgb_buf,
2492
                     const struct YuvConstants* yuvconstants,
2493
0
                     int width) {
2494
0
  int x;
2495
0
  for (x = 0; x < width - 1; x += 2) {
2496
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2497
0
             rgb_buf + 2, yuvconstants);
2498
0
    rgb_buf[3] = 255;
2499
0
    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2500
0
             rgb_buf + 6, yuvconstants);
2501
0
    rgb_buf[7] = 255;
2502
0
    src_y += 2;
2503
0
    src_vu += 2;
2504
0
    rgb_buf += 8;  // Advance 2 pixels.
2505
0
  }
2506
0
  if (width & 1) {
2507
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2508
0
             rgb_buf + 2, yuvconstants);
2509
0
    rgb_buf[3] = 255;
2510
0
  }
2511
0
}
2512
2513
void NV12ToRGB24Row_C(const uint8_t* src_y,
2514
                      const uint8_t* src_uv,
2515
                      uint8_t* rgb_buf,
2516
                      const struct YuvConstants* yuvconstants,
2517
0
                      int width) {
2518
0
  int x;
2519
0
  for (x = 0; x < width - 1; x += 2) {
2520
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2521
0
             rgb_buf + 2, yuvconstants);
2522
0
    YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2523
0
             rgb_buf + 5, yuvconstants);
2524
0
    src_y += 2;
2525
0
    src_uv += 2;
2526
0
    rgb_buf += 6;  // Advance 2 pixels.
2527
0
  }
2528
0
  if (width & 1) {
2529
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2530
0
             rgb_buf + 2, yuvconstants);
2531
0
  }
2532
0
}
2533
2534
void NV21ToRGB24Row_C(const uint8_t* src_y,
2535
                      const uint8_t* src_vu,
2536
                      uint8_t* rgb_buf,
2537
                      const struct YuvConstants* yuvconstants,
2538
0
                      int width) {
2539
0
  int x;
2540
0
  for (x = 0; x < width - 1; x += 2) {
2541
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2542
0
             rgb_buf + 2, yuvconstants);
2543
0
    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2544
0
             rgb_buf + 5, yuvconstants);
2545
0
    src_y += 2;
2546
0
    src_vu += 2;
2547
0
    rgb_buf += 6;  // Advance 2 pixels.
2548
0
  }
2549
0
  if (width & 1) {
2550
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2551
0
             rgb_buf + 2, yuvconstants);
2552
0
  }
2553
0
}
2554
2555
void NV12ToRGB565Row_C(const uint8_t* src_y,
2556
                       const uint8_t* src_uv,
2557
                       uint8_t* dst_rgb565,
2558
                       const struct YuvConstants* yuvconstants,
2559
0
                       int width) {
2560
0
  uint8_t b0;
2561
0
  uint8_t g0;
2562
0
  uint8_t r0;
2563
0
  uint8_t b1;
2564
0
  uint8_t g1;
2565
0
  uint8_t r1;
2566
0
  int x;
2567
0
  for (x = 0; x < width - 1; x += 2) {
2568
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2569
0
    YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2570
0
    b0 = b0 >> 3;
2571
0
    g0 = g0 >> 2;
2572
0
    r0 = r0 >> 3;
2573
0
    b1 = b1 >> 3;
2574
0
    g1 = g1 >> 2;
2575
0
    r1 = r1 >> 3;
2576
0
    *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
2577
0
                                   STATIC_CAST(uint16_t, g0 << 5) |
2578
0
                                   STATIC_CAST(uint16_t, r0 << 11);
2579
0
    *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
2580
0
                                   STATIC_CAST(uint16_t, g1 << 5) |
2581
0
                                   STATIC_CAST(uint16_t, r1 << 11);
2582
0
    src_y += 2;
2583
0
    src_uv += 2;
2584
0
    dst_rgb565 += 4;  // Advance 2 pixels.
2585
0
  }
2586
0
  if (width & 1) {
2587
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2588
0
    b0 = b0 >> 3;
2589
0
    g0 = g0 >> 2;
2590
0
    r0 = r0 >> 3;
2591
0
    *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
2592
0
                               STATIC_CAST(uint16_t, g0 << 5) |
2593
0
                               STATIC_CAST(uint16_t, r0 << 11);
2594
0
  }
2595
0
}
2596
2597
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2598
                     uint8_t* rgb_buf,
2599
                     const struct YuvConstants* yuvconstants,
2600
0
                     int width) {
2601
0
  int x;
2602
0
  for (x = 0; x < width - 1; x += 2) {
2603
0
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2604
0
             rgb_buf + 2, yuvconstants);
2605
0
    rgb_buf[3] = 255;
2606
0
    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2607
0
             rgb_buf + 6, yuvconstants);
2608
0
    rgb_buf[7] = 255;
2609
0
    src_yuy2 += 4;
2610
0
    rgb_buf += 8;  // Advance 2 pixels.
2611
0
  }
2612
0
  if (width & 1) {
2613
0
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2614
0
             rgb_buf + 2, yuvconstants);
2615
0
    rgb_buf[3] = 255;
2616
0
  }
2617
0
}
2618
2619
void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2620
                     uint8_t* rgb_buf,
2621
                     const struct YuvConstants* yuvconstants,
2622
0
                     int width) {
2623
0
  int x;
2624
0
  for (x = 0; x < width - 1; x += 2) {
2625
0
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2626
0
             rgb_buf + 2, yuvconstants);
2627
0
    rgb_buf[3] = 255;
2628
0
    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2629
0
             rgb_buf + 6, yuvconstants);
2630
0
    rgb_buf[7] = 255;
2631
0
    src_uyvy += 4;
2632
0
    rgb_buf += 8;  // Advance 2 pixels.
2633
0
  }
2634
0
  if (width & 1) {
2635
0
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2636
0
             rgb_buf + 2, yuvconstants);
2637
0
    rgb_buf[3] = 255;
2638
0
  }
2639
0
}
2640
2641
void I422ToRGBARow_C(const uint8_t* src_y,
2642
                     const uint8_t* src_u,
2643
                     const uint8_t* src_v,
2644
                     uint8_t* rgb_buf,
2645
                     const struct YuvConstants* yuvconstants,
2646
0
                     int width) {
2647
0
  int x;
2648
0
  for (x = 0; x < width - 1; x += 2) {
2649
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2650
0
             rgb_buf + 3, yuvconstants);
2651
0
    rgb_buf[0] = 255;
2652
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2653
0
             rgb_buf + 7, yuvconstants);
2654
0
    rgb_buf[4] = 255;
2655
0
    src_y += 2;
2656
0
    src_u += 1;
2657
0
    src_v += 1;
2658
0
    rgb_buf += 8;  // Advance 2 pixels.
2659
0
  }
2660
0
  if (width & 1) {
2661
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2662
0
             rgb_buf + 3, yuvconstants);
2663
0
    rgb_buf[0] = 255;
2664
0
  }
2665
0
}
2666
2667
void I400ToARGBRow_C(const uint8_t* src_y,
2668
                     uint8_t* rgb_buf,
2669
                     const struct YuvConstants* yuvconstants,
2670
0
                     int width) {
2671
0
  int x;
2672
0
  for (x = 0; x < width - 1; x += 2) {
2673
0
    YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2674
0
    rgb_buf[3] = 255;
2675
0
    YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2676
0
    rgb_buf[7] = 255;
2677
0
    src_y += 2;
2678
0
    rgb_buf += 8;  // Advance 2 pixels.
2679
0
  }
2680
0
  if (width & 1) {
2681
0
    YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2682
0
    rgb_buf[3] = 255;
2683
0
  }
2684
0
}
2685
2686
0
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2687
0
  int x;
2688
0
  src += width - 1;
2689
0
  for (x = 0; x < width - 1; x += 2) {
2690
0
    dst[x] = src[0];
2691
0
    dst[x + 1] = src[-1];
2692
0
    src -= 2;
2693
0
  }
2694
0
  if (width & 1) {
2695
0
    dst[width - 1] = src[0];
2696
0
  }
2697
0
}
2698
2699
0
void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) {
2700
0
  int x;
2701
0
  src += width - 1;
2702
0
  for (x = 0; x < width - 1; x += 2) {
2703
0
    dst[x] = src[0];
2704
0
    dst[x + 1] = src[-1];
2705
0
    src -= 2;
2706
0
  }
2707
0
  if (width & 1) {
2708
0
    dst[width - 1] = src[0];
2709
0
  }
2710
0
}
2711
2712
0
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2713
0
  int x;
2714
0
  src_uv += (width - 1) << 1;
2715
0
  for (x = 0; x < width; ++x) {
2716
0
    dst_uv[0] = src_uv[0];
2717
0
    dst_uv[1] = src_uv[1];
2718
0
    src_uv -= 2;
2719
0
    dst_uv += 2;
2720
0
  }
2721
0
}
2722
2723
void MirrorSplitUVRow_C(const uint8_t* src_uv,
2724
                        uint8_t* dst_u,
2725
                        uint8_t* dst_v,
2726
0
                        int width) {
2727
0
  int x;
2728
0
  src_uv += (width - 1) << 1;
2729
0
  for (x = 0; x < width - 1; x += 2) {
2730
0
    dst_u[x] = src_uv[0];
2731
0
    dst_u[x + 1] = src_uv[-2];
2732
0
    dst_v[x] = src_uv[1];
2733
0
    dst_v[x + 1] = src_uv[-2 + 1];
2734
0
    src_uv -= 4;
2735
0
  }
2736
0
  if (width & 1) {
2737
0
    dst_u[width - 1] = src_uv[0];
2738
0
    dst_v[width - 1] = src_uv[1];
2739
0
  }
2740
0
}
2741
2742
0
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2743
0
  int x;
2744
0
  const uint32_t* src32 = (const uint32_t*)(src);
2745
0
  uint32_t* dst32 = (uint32_t*)(dst);
2746
0
  src32 += width - 1;
2747
0
  for (x = 0; x < width - 1; x += 2) {
2748
0
    dst32[x] = src32[0];
2749
0
    dst32[x + 1] = src32[-1];
2750
0
    src32 -= 2;
2751
0
  }
2752
0
  if (width & 1) {
2753
0
    dst32[width - 1] = src32[0];
2754
0
  }
2755
0
}
2756
2757
0
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2758
0
  int x;
2759
0
  src_rgb24 += width * 3 - 3;
2760
0
  for (x = 0; x < width; ++x) {
2761
0
    uint8_t b = src_rgb24[0];
2762
0
    uint8_t g = src_rgb24[1];
2763
0
    uint8_t r = src_rgb24[2];
2764
0
    dst_rgb24[0] = b;
2765
0
    dst_rgb24[1] = g;
2766
0
    dst_rgb24[2] = r;
2767
0
    src_rgb24 -= 3;
2768
0
    dst_rgb24 += 3;
2769
0
  }
2770
0
}
2771
2772
void SplitUVRow_C(const uint8_t* src_uv,
2773
                  uint8_t* dst_u,
2774
                  uint8_t* dst_v,
2775
0
                  int width) {
2776
0
  int x;
2777
0
  for (x = 0; x < width - 1; x += 2) {
2778
0
    dst_u[x] = src_uv[0];
2779
0
    dst_u[x + 1] = src_uv[2];
2780
0
    dst_v[x] = src_uv[1];
2781
0
    dst_v[x + 1] = src_uv[3];
2782
0
    src_uv += 4;
2783
0
  }
2784
0
  if (width & 1) {
2785
0
    dst_u[width - 1] = src_uv[0];
2786
0
    dst_v[width - 1] = src_uv[1];
2787
0
  }
2788
0
}
2789
2790
void MergeUVRow_C(const uint8_t* src_u,
2791
                  const uint8_t* src_v,
2792
                  uint8_t* dst_uv,
2793
0
                  int width) {
2794
0
  int x;
2795
0
  for (x = 0; x < width - 1; x += 2) {
2796
0
    dst_uv[0] = src_u[x];
2797
0
    dst_uv[1] = src_v[x];
2798
0
    dst_uv[2] = src_u[x + 1];
2799
0
    dst_uv[3] = src_v[x + 1];
2800
0
    dst_uv += 4;
2801
0
  }
2802
0
  if (width & 1) {
2803
0
    dst_uv[0] = src_u[width - 1];
2804
0
    dst_uv[1] = src_v[width - 1];
2805
0
  }
2806
0
}
2807
2808
void DetileRow_C(const uint8_t* src,
2809
                 ptrdiff_t src_tile_stride,
2810
                 uint8_t* dst,
2811
0
                 int width) {
2812
0
  int x;
2813
0
  for (x = 0; x < width - 15; x += 16) {
2814
0
    memcpy(dst, src, 16);
2815
0
    dst += 16;
2816
0
    src += src_tile_stride;
2817
0
  }
2818
0
  if (width & 15) {
2819
0
    memcpy(dst, src, width & 15);
2820
0
  }
2821
0
}
2822
2823
void DetileRow_16_C(const uint16_t* src,
2824
                    ptrdiff_t src_tile_stride,
2825
                    uint16_t* dst,
2826
0
                    int width) {
2827
0
  int x;
2828
0
  for (x = 0; x < width - 15; x += 16) {
2829
0
    memcpy(dst, src, 16 * sizeof(uint16_t));
2830
0
    dst += 16;
2831
0
    src += src_tile_stride;
2832
0
  }
2833
0
  if (width & 15) {
2834
0
    memcpy(dst, src, (width & 15) * sizeof(uint16_t));
2835
0
  }
2836
0
}
2837
2838
void DetileSplitUVRow_C(const uint8_t* src_uv,
2839
                        ptrdiff_t src_tile_stride,
2840
                        uint8_t* dst_u,
2841
                        uint8_t* dst_v,
2842
0
                        int width) {
2843
0
  int x;
2844
0
  for (x = 0; x < width - 15; x += 16) {
2845
0
    SplitUVRow_C(src_uv, dst_u, dst_v, 8);
2846
0
    dst_u += 8;
2847
0
    dst_v += 8;
2848
0
    src_uv += src_tile_stride;
2849
0
  }
2850
0
  if (width & 15) {
2851
0
    SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
2852
0
  }
2853
0
}
2854
2855
void DetileToYUY2_C(const uint8_t* src_y,
2856
                    ptrdiff_t src_y_tile_stride,
2857
                    const uint8_t* src_uv,
2858
                    ptrdiff_t src_uv_tile_stride,
2859
                    uint8_t* dst_yuy2,
2860
0
                    int width) {
2861
0
  for (int x = 0; x < width - 15; x += 16) {
2862
0
    for (int i = 0; i < 8; i++) {
2863
0
      dst_yuy2[0] = src_y[0];
2864
0
      dst_yuy2[1] = src_uv[0];
2865
0
      dst_yuy2[2] = src_y[1];
2866
0
      dst_yuy2[3] = src_uv[1];
2867
0
      dst_yuy2 += 4;
2868
0
      src_y += 2;
2869
0
      src_uv += 2;
2870
0
    }
2871
0
    src_y += src_y_tile_stride - 16;
2872
0
    src_uv += src_uv_tile_stride - 16;
2873
0
  }
2874
0
}
2875
2876
// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
2877
// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
2878
// block contain all of the lower 2 bits of each pixel packed together, and the
2879
// next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are
2880
// packed into 1x4 blocks, whereas the upper bits are packed in normal raster
2881
// order.
2882
0
void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
2883
0
  for (size_t i = 0; i < size; i += 80) {
2884
0
    const uint8_t* src_lower_bits = src;
2885
0
    const uint8_t* src_upper_bits = src + 16;
2886
2887
0
    for (int j = 0; j < 4; j++) {
2888
0
      for (int k = 0; k < 16; k++) {
2889
0
        *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
2890
0
                 (uint16_t)*src_upper_bits << 8 |
2891
0
                 (uint16_t)*src_upper_bits >> 2;
2892
0
        src_upper_bits++;
2893
0
      }
2894
0
    }
2895
2896
0
    src += 80;
2897
0
  }
2898
0
}
2899
2900
void SplitRGBRow_C(const uint8_t* src_rgb,
2901
                   uint8_t* dst_r,
2902
                   uint8_t* dst_g,
2903
                   uint8_t* dst_b,
2904
0
                   int width) {
2905
0
  int x;
2906
0
  for (x = 0; x < width; ++x) {
2907
0
    dst_r[x] = src_rgb[0];
2908
0
    dst_g[x] = src_rgb[1];
2909
0
    dst_b[x] = src_rgb[2];
2910
0
    src_rgb += 3;
2911
0
  }
2912
0
}
2913
2914
void MergeRGBRow_C(const uint8_t* src_r,
2915
                   const uint8_t* src_g,
2916
                   const uint8_t* src_b,
2917
                   uint8_t* dst_rgb,
2918
0
                   int width) {
2919
0
  int x;
2920
0
  for (x = 0; x < width; ++x) {
2921
0
    dst_rgb[0] = src_r[x];
2922
0
    dst_rgb[1] = src_g[x];
2923
0
    dst_rgb[2] = src_b[x];
2924
0
    dst_rgb += 3;
2925
0
  }
2926
0
}
2927
2928
void SplitARGBRow_C(const uint8_t* src_argb,
2929
                    uint8_t* dst_r,
2930
                    uint8_t* dst_g,
2931
                    uint8_t* dst_b,
2932
                    uint8_t* dst_a,
2933
0
                    int width) {
2934
0
  int x;
2935
0
  for (x = 0; x < width; ++x) {
2936
0
    dst_b[x] = src_argb[0];
2937
0
    dst_g[x] = src_argb[1];
2938
0
    dst_r[x] = src_argb[2];
2939
0
    dst_a[x] = src_argb[3];
2940
0
    src_argb += 4;
2941
0
  }
2942
0
}
2943
2944
void MergeARGBRow_C(const uint8_t* src_r,
2945
                    const uint8_t* src_g,
2946
                    const uint8_t* src_b,
2947
                    const uint8_t* src_a,
2948
                    uint8_t* dst_argb,
2949
0
                    int width) {
2950
0
  int x;
2951
0
  for (x = 0; x < width; ++x) {
2952
0
    dst_argb[0] = src_b[x];
2953
0
    dst_argb[1] = src_g[x];
2954
0
    dst_argb[2] = src_r[x];
2955
0
    dst_argb[3] = src_a[x];
2956
0
    dst_argb += 4;
2957
0
  }
2958
0
}
2959
2960
void MergeXR30Row_C(const uint16_t* src_r,
2961
                    const uint16_t* src_g,
2962
                    const uint16_t* src_b,
2963
                    uint8_t* dst_ar30,
2964
                    int depth,
2965
0
                    int width) {
2966
0
  assert(depth >= 10);
2967
0
  assert(depth <= 16);
2968
0
  int x;
2969
0
  int shift = depth - 10;
2970
0
  uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
2971
0
  for (x = 0; x < width; ++x) {
2972
0
    uint32_t r = clamp1023(src_r[x] >> shift);
2973
0
    uint32_t g = clamp1023(src_g[x] >> shift);
2974
0
    uint32_t b = clamp1023(src_b[x] >> shift);
2975
0
    dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
2976
0
  }
2977
0
}
2978
2979
void MergeAR64Row_C(const uint16_t* src_r,
2980
                    const uint16_t* src_g,
2981
                    const uint16_t* src_b,
2982
                    const uint16_t* src_a,
2983
                    uint16_t* dst_ar64,
2984
                    int depth,
2985
0
                    int width) {
2986
0
  assert(depth >= 1);
2987
0
  assert(depth <= 16);
2988
0
  int x;
2989
0
  int shift = 16 - depth;
2990
0
  int max = (1 << depth) - 1;
2991
0
  for (x = 0; x < width; ++x) {
2992
0
    dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
2993
0
    dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
2994
0
    dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
2995
0
    dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
2996
0
    dst_ar64 += 4;
2997
0
  }
2998
0
}
2999
3000
void MergeARGB16To8Row_C(const uint16_t* src_r,
3001
                         const uint16_t* src_g,
3002
                         const uint16_t* src_b,
3003
                         const uint16_t* src_a,
3004
                         uint8_t* dst_argb,
3005
                         int depth,
3006
0
                         int width) {
3007
0
  assert(depth >= 8);
3008
0
  assert(depth <= 16);
3009
0
  int x;
3010
0
  int shift = depth - 8;
3011
0
  for (x = 0; x < width; ++x) {
3012
0
    dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
3013
0
    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
3014
0
    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
3015
0
    dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
3016
0
    dst_argb += 4;
3017
0
  }
3018
0
}
3019
3020
void MergeXR64Row_C(const uint16_t* src_r,
3021
                    const uint16_t* src_g,
3022
                    const uint16_t* src_b,
3023
                    uint16_t* dst_ar64,
3024
                    int depth,
3025
0
                    int width) {
3026
0
  assert(depth >= 1);
3027
0
  assert(depth <= 16);
3028
0
  int x;
3029
0
  int shift = 16 - depth;
3030
0
  int max = (1 << depth) - 1;
3031
0
  for (x = 0; x < width; ++x) {
3032
0
    dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
3033
0
    dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
3034
0
    dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
3035
0
    dst_ar64[3] = 0xffff;
3036
0
    dst_ar64 += 4;
3037
0
  }
3038
0
}
3039
3040
void MergeXRGB16To8Row_C(const uint16_t* src_r,
3041
                         const uint16_t* src_g,
3042
                         const uint16_t* src_b,
3043
                         uint8_t* dst_argb,
3044
                         int depth,
3045
0
                         int width) {
3046
0
  assert(depth >= 8);
3047
0
  assert(depth <= 16);
3048
0
  int x;
3049
0
  int shift = depth - 8;
3050
0
  for (x = 0; x < width; ++x) {
3051
0
    dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
3052
0
    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
3053
0
    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
3054
0
    dst_argb[3] = 0xff;
3055
0
    dst_argb += 4;
3056
0
  }
3057
0
}
3058
3059
void SplitXRGBRow_C(const uint8_t* src_argb,
3060
                    uint8_t* dst_r,
3061
                    uint8_t* dst_g,
3062
                    uint8_t* dst_b,
3063
0
                    int width) {
3064
0
  int x;
3065
0
  for (x = 0; x < width; ++x) {
3066
0
    dst_b[x] = src_argb[0];
3067
0
    dst_g[x] = src_argb[1];
3068
0
    dst_r[x] = src_argb[2];
3069
0
    src_argb += 4;
3070
0
  }
3071
0
}
3072
3073
void MergeXRGBRow_C(const uint8_t* src_r,
3074
                    const uint8_t* src_g,
3075
                    const uint8_t* src_b,
3076
                    uint8_t* dst_argb,
3077
0
                    int width) {
3078
0
  int x;
3079
0
  for (x = 0; x < width; ++x) {
3080
0
    dst_argb[0] = src_b[x];
3081
0
    dst_argb[1] = src_g[x];
3082
0
    dst_argb[2] = src_r[x];
3083
0
    dst_argb[3] = 255;
3084
0
    dst_argb += 4;
3085
0
  }
3086
0
}
3087
3088
// Convert lsb formats to msb, depending on sample depth.
3089
void MergeUVRow_16_C(const uint16_t* src_u,
3090
                     const uint16_t* src_v,
3091
                     uint16_t* dst_uv,
3092
                     int depth,
3093
0
                     int width) {
3094
0
  int shift = 16 - depth;
3095
0
  assert(depth >= 8);
3096
0
  assert(depth <= 16);
3097
0
  int x;
3098
0
  for (x = 0; x < width; ++x) {
3099
0
    dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
3100
0
    dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
3101
0
    dst_uv += 2;
3102
0
  }
3103
0
}
3104
3105
// Convert msb formats to lsb, depending on sample depth.
3106
void SplitUVRow_16_C(const uint16_t* src_uv,
3107
                     uint16_t* dst_u,
3108
                     uint16_t* dst_v,
3109
                     int depth,
3110
0
                     int width) {
3111
0
  int shift = 16 - depth;
3112
0
  int x;
3113
0
  assert(depth >= 8);
3114
0
  assert(depth <= 16);
3115
0
  for (x = 0; x < width; ++x) {
3116
0
    dst_u[x] = src_uv[0] >> shift;
3117
0
    dst_v[x] = src_uv[1] >> shift;
3118
0
    src_uv += 2;
3119
0
  }
3120
0
}
3121
3122
void MultiplyRow_16_C(const uint16_t* src_y,
3123
                      uint16_t* dst_y,
3124
                      int scale,
3125
0
                      int width) {
3126
0
  int x;
3127
0
  for (x = 0; x < width; ++x) {
3128
0
    dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
3129
0
  }
3130
0
}
3131
3132
void DivideRow_16_C(const uint16_t* src_y,
3133
                    uint16_t* dst_y,
3134
                    int scale,
3135
0
                    int width) {
3136
0
  int x;
3137
0
  for (x = 0; x < width; ++x) {
3138
0
    dst_y[x] = (src_y[x] * scale) >> 16;
3139
0
  }
3140
0
}
3141
3142
// Use scale to convert lsb formats to msb, depending how many bits there are:
3143
// 32768 = 9 bits
3144
// 16384 = 10 bits
3145
// 4096 = 12 bits
3146
// 256 = 16 bits
3147
// TODO(fbarchard): change scale to bits
3148
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
3149
3150
void Convert16To8Row_C(const uint16_t* src_y,
3151
                       uint8_t* dst_y,
3152
                       int scale,
3153
0
                       int width) {
3154
0
  int x;
3155
0
  assert(scale >= 256);
3156
0
  assert(scale <= 32768);
3157
3158
0
  for (x = 0; x < width; ++x) {
3159
0
    dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
3160
0
  }
3161
0
}
3162
3163
// Use scale to convert lsb formats to msb, depending how many bits there are:
3164
// 1024 = 10 bits
3165
void Convert8To16Row_C(const uint8_t* src_y,
3166
                       uint16_t* dst_y,
3167
                       int scale,
3168
0
                       int width) {
3169
0
  int x;
3170
0
  scale *= 0x0101;  // replicates the byte.
3171
0
  for (x = 0; x < width; ++x) {
3172
0
    dst_y[x] = (src_y[x] * scale) >> 16;
3173
0
  }
3174
0
}
3175
3176
// Use scale to convert J420 to I420
3177
// scale parameter is 8.8 fixed point but limited to 0 to 255
3178
// Function is based on DivideRow, but adds a bias
3179
// Does not clamp
3180
void Convert8To8Row_C(const uint8_t* src_y,
3181
                      uint8_t* dst_y,
3182
                      int scale,
3183
                      int bias,
3184
0
                      int width) {
3185
0
  int x;
3186
0
  assert(scale >= 0);
3187
0
  assert(scale <= 255);
3188
3189
0
  for (x = 0; x < width; ++x) {
3190
0
    dst_y[x] = ((src_y[x] * scale) >> 8) + bias;
3191
0
  }
3192
0
}
3193
3194
0
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
3195
0
  memcpy(dst, src, count);
3196
0
}
3197
3198
0
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
3199
0
  memcpy(dst, src, count * 2);
3200
0
}
3201
3202
0
void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
3203
0
  memset(dst, v8, width);
3204
0
}
3205
3206
0
void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
3207
0
  int x;
3208
0
  for (x = 0; x < width; ++x) {
3209
0
    memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
3210
0
  }
3211
0
}
3212
3213
// Filter 2 rows of YUY2 UV's (422) into U and V (420).
3214
void YUY2ToUVRow_C(const uint8_t* src_yuy2,
3215
                   int src_stride_yuy2,
3216
                   uint8_t* dst_u,
3217
                   uint8_t* dst_v,
3218
0
                   int width) {
3219
  // Output a row of UV values, filtering 2 rows of YUY2.
3220
0
  int x;
3221
0
  for (x = 0; x < width; x += 2) {
3222
0
    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3223
0
    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3224
0
    src_yuy2 += 4;
3225
0
    dst_u += 1;
3226
0
    dst_v += 1;
3227
0
  }
3228
0
}
3229
3230
// Filter 2 rows of YUY2 UV's (422) into UV (NV12).
3231
void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
3232
                     int src_stride_yuy2,
3233
                     uint8_t* dst_uv,
3234
0
                     int width) {
3235
  // Output a row of UV values, filtering 2 rows of YUY2.
3236
0
  int x;
3237
0
  for (x = 0; x < width; x += 2) {
3238
0
    dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3239
0
    dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3240
0
    src_yuy2 += 4;
3241
0
    dst_uv += 2;
3242
0
  }
3243
0
}
3244
3245
// Copy row of YUY2 UV's (422) into U and V (422).
3246
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3247
                      uint8_t* dst_u,
3248
                      uint8_t* dst_v,
3249
0
                      int width) {
3250
  // Output a row of UV values.
3251
0
  int x;
3252
0
  for (x = 0; x < width; x += 2) {
3253
0
    dst_u[0] = src_yuy2[1];
3254
0
    dst_v[0] = src_yuy2[3];
3255
0
    src_yuy2 += 4;
3256
0
    dst_u += 1;
3257
0
    dst_v += 1;
3258
0
  }
3259
0
}
3260
3261
// Copy row of YUY2 Y's (422) into Y (420/422).
3262
0
void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3263
  // Output a row of Y values.
3264
0
  int x;
3265
0
  for (x = 0; x < width - 1; x += 2) {
3266
0
    dst_y[x] = src_yuy2[0];
3267
0
    dst_y[x + 1] = src_yuy2[2];
3268
0
    src_yuy2 += 4;
3269
0
  }
3270
0
  if (width & 1) {
3271
0
    dst_y[width - 1] = src_yuy2[0];
3272
0
  }
3273
0
}
3274
3275
// Filter 2 rows of UYVY UV's (422) into U and V (420).
3276
void UYVYToUVRow_C(const uint8_t* src_uyvy,
3277
                   int src_stride_uyvy,
3278
                   uint8_t* dst_u,
3279
                   uint8_t* dst_v,
3280
0
                   int width) {
3281
  // Output a row of UV values.
3282
0
  int x;
3283
0
  for (x = 0; x < width; x += 2) {
3284
0
    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3285
0
    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3286
0
    src_uyvy += 4;
3287
0
    dst_u += 1;
3288
0
    dst_v += 1;
3289
0
  }
3290
0
}
3291
3292
// Copy row of UYVY UV's (422) into U and V (422).
3293
void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3294
                      uint8_t* dst_u,
3295
                      uint8_t* dst_v,
3296
0
                      int width) {
3297
  // Output a row of UV values.
3298
0
  int x;
3299
0
  for (x = 0; x < width; x += 2) {
3300
0
    dst_u[0] = src_uyvy[0];
3301
0
    dst_v[0] = src_uyvy[2];
3302
0
    src_uyvy += 4;
3303
0
    dst_u += 1;
3304
0
    dst_v += 1;
3305
0
  }
3306
0
}
3307
3308
// Copy row of UYVY Y's (422) into Y (420/422).
3309
0
void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3310
  // Output a row of Y values.
3311
0
  int x;
3312
0
  for (x = 0; x < width - 1; x += 2) {
3313
0
    dst_y[x] = src_uyvy[1];
3314
0
    dst_y[x + 1] = src_uyvy[3];
3315
0
    src_uyvy += 4;
3316
0
  }
3317
0
  if (width & 1) {
3318
0
    dst_y[width - 1] = src_uyvy[1];
3319
0
  }
3320
0
}
3321
3322
#define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3323
3324
// Blend src_argb over src_argb1 and store to dst_argb.
3325
// dst_argb may be src_argb or src_argb1.
3326
// This code mimics the SSSE3 version for better testability.
3327
void ARGBBlendRow_C(const uint8_t* src_argb,
3328
                    const uint8_t* src_argb1,
3329
                    uint8_t* dst_argb,
3330
0
                    int width) {
3331
0
  int x;
3332
0
  for (x = 0; x < width - 1; x += 2) {
3333
0
    uint32_t fb = src_argb[0];
3334
0
    uint32_t fg = src_argb[1];
3335
0
    uint32_t fr = src_argb[2];
3336
0
    uint32_t a = src_argb[3];
3337
0
    uint32_t bb = src_argb1[0];
3338
0
    uint32_t bg = src_argb1[1];
3339
0
    uint32_t br = src_argb1[2];
3340
0
    dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3341
0
    dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3342
0
    dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3343
0
    dst_argb[3] = 255u;
3344
3345
0
    fb = src_argb[4 + 0];
3346
0
    fg = src_argb[4 + 1];
3347
0
    fr = src_argb[4 + 2];
3348
0
    a = src_argb[4 + 3];
3349
0
    bb = src_argb1[4 + 0];
3350
0
    bg = src_argb1[4 + 1];
3351
0
    br = src_argb1[4 + 2];
3352
0
    dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3353
0
    dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3354
0
    dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3355
0
    dst_argb[4 + 3] = 255u;
3356
0
    src_argb += 8;
3357
0
    src_argb1 += 8;
3358
0
    dst_argb += 8;
3359
0
  }
3360
3361
0
  if (width & 1) {
3362
0
    uint32_t fb = src_argb[0];
3363
0
    uint32_t fg = src_argb[1];
3364
0
    uint32_t fr = src_argb[2];
3365
0
    uint32_t a = src_argb[3];
3366
0
    uint32_t bb = src_argb1[0];
3367
0
    uint32_t bg = src_argb1[1];
3368
0
    uint32_t br = src_argb1[2];
3369
0
    dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3370
0
    dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3371
0
    dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3372
0
    dst_argb[3] = 255u;
3373
0
  }
3374
0
}
3375
#undef BLEND
3376
3377
0
#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
3378
void BlendPlaneRow_C(const uint8_t* src0,
3379
                     const uint8_t* src1,
3380
                     const uint8_t* alpha,
3381
                     uint8_t* dst,
3382
0
                     int width) {
3383
0
  int x;
3384
0
  for (x = 0; x < width - 1; x += 2) {
3385
0
    dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3386
0
    dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3387
0
    src0 += 2;
3388
0
    src1 += 2;
3389
0
    alpha += 2;
3390
0
    dst += 2;
3391
0
  }
3392
0
  if (width & 1) {
3393
0
    dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3394
0
  }
3395
0
}
3396
#undef UBLEND
3397
3398
0
#define ATTENUATE(f, a) (f * a + 255) >> 8
3399
3400
// Multiply source RGB by alpha and store to destination.
3401
0
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3402
0
  int i;
3403
0
  for (i = 0; i < width - 1; i += 2) {
3404
0
    uint32_t b = src_argb[0];
3405
0
    uint32_t g = src_argb[1];
3406
0
    uint32_t r = src_argb[2];
3407
0
    uint32_t a = src_argb[3];
3408
0
    dst_argb[0] = ATTENUATE(b, a);
3409
0
    dst_argb[1] = ATTENUATE(g, a);
3410
0
    dst_argb[2] = ATTENUATE(r, a);
3411
0
    dst_argb[3] = STATIC_CAST(uint8_t, a);
3412
0
    b = src_argb[4];
3413
0
    g = src_argb[5];
3414
0
    r = src_argb[6];
3415
0
    a = src_argb[7];
3416
0
    dst_argb[4] = ATTENUATE(b, a);
3417
0
    dst_argb[5] = ATTENUATE(g, a);
3418
0
    dst_argb[6] = ATTENUATE(r, a);
3419
0
    dst_argb[7] = STATIC_CAST(uint8_t, a);
3420
0
    src_argb += 8;
3421
0
    dst_argb += 8;
3422
0
  }
3423
3424
0
  if (width & 1) {
3425
0
    const uint32_t b = src_argb[0];
3426
0
    const uint32_t g = src_argb[1];
3427
0
    const uint32_t r = src_argb[2];
3428
0
    const uint32_t a = src_argb[3];
3429
0
    dst_argb[0] = ATTENUATE(b, a);
3430
0
    dst_argb[1] = ATTENUATE(g, a);
3431
0
    dst_argb[2] = ATTENUATE(r, a);
3432
0
    dst_argb[3] = STATIC_CAST(uint8_t, a);
3433
0
  }
3434
0
}
3435
#undef ATTENUATE
3436
3437
// Divide source RGB by alpha and store to destination.
3438
// b = (b * 255 + (a / 2)) / a;
3439
// g = (g * 255 + (a / 2)) / a;
3440
// r = (r * 255 + (a / 2)) / a;
3441
// Reciprocal method is off by 1 on some values. ie 125
3442
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3443
#define T(a) 0x01000000 + (0x10000 / a)
3444
const uint32_t fixed_invtbl8[256] = {
3445
    0x01000000, 0x0100ffff, T(0x02), T(0x03),   T(0x04), T(0x05), T(0x06),
3446
    T(0x07),    T(0x08),    T(0x09), T(0x0a),   T(0x0b), T(0x0c), T(0x0d),
3447
    T(0x0e),    T(0x0f),    T(0x10), T(0x11),   T(0x12), T(0x13), T(0x14),
3448
    T(0x15),    T(0x16),    T(0x17), T(0x18),   T(0x19), T(0x1a), T(0x1b),
3449
    T(0x1c),    T(0x1d),    T(0x1e), T(0x1f),   T(0x20), T(0x21), T(0x22),
3450
    T(0x23),    T(0x24),    T(0x25), T(0x26),   T(0x27), T(0x28), T(0x29),
3451
    T(0x2a),    T(0x2b),    T(0x2c), T(0x2d),   T(0x2e), T(0x2f), T(0x30),
3452
    T(0x31),    T(0x32),    T(0x33), T(0x34),   T(0x35), T(0x36), T(0x37),
3453
    T(0x38),    T(0x39),    T(0x3a), T(0x3b),   T(0x3c), T(0x3d), T(0x3e),
3454
    T(0x3f),    T(0x40),    T(0x41), T(0x42),   T(0x43), T(0x44), T(0x45),
3455
    T(0x46),    T(0x47),    T(0x48), T(0x49),   T(0x4a), T(0x4b), T(0x4c),
3456
    T(0x4d),    T(0x4e),    T(0x4f), T(0x50),   T(0x51), T(0x52), T(0x53),
3457
    T(0x54),    T(0x55),    T(0x56), T(0x57),   T(0x58), T(0x59), T(0x5a),
3458
    T(0x5b),    T(0x5c),    T(0x5d), T(0x5e),   T(0x5f), T(0x60), T(0x61),
3459
    T(0x62),    T(0x63),    T(0x64), T(0x65),   T(0x66), T(0x67), T(0x68),
3460
    T(0x69),    T(0x6a),    T(0x6b), T(0x6c),   T(0x6d), T(0x6e), T(0x6f),
3461
    T(0x70),    T(0x71),    T(0x72), T(0x73),   T(0x74), T(0x75), T(0x76),
3462
    T(0x77),    T(0x78),    T(0x79), T(0x7a),   T(0x7b), T(0x7c), T(0x7d),
3463
    T(0x7e),    T(0x7f),    T(0x80), T(0x81),   T(0x82), T(0x83), T(0x84),
3464
    T(0x85),    T(0x86),    T(0x87), T(0x88),   T(0x89), T(0x8a), T(0x8b),
3465
    T(0x8c),    T(0x8d),    T(0x8e), T(0x8f),   T(0x90), T(0x91), T(0x92),
3466
    T(0x93),    T(0x94),    T(0x95), T(0x96),   T(0x97), T(0x98), T(0x99),
3467
    T(0x9a),    T(0x9b),    T(0x9c), T(0x9d),   T(0x9e), T(0x9f), T(0xa0),
3468
    T(0xa1),    T(0xa2),    T(0xa3), T(0xa4),   T(0xa5), T(0xa6), T(0xa7),
3469
    T(0xa8),    T(0xa9),    T(0xaa), T(0xab),   T(0xac), T(0xad), T(0xae),
3470
    T(0xaf),    T(0xb0),    T(0xb1), T(0xb2),   T(0xb3), T(0xb4), T(0xb5),
3471
    T(0xb6),    T(0xb7),    T(0xb8), T(0xb9),   T(0xba), T(0xbb), T(0xbc),
3472
    T(0xbd),    T(0xbe),    T(0xbf), T(0xc0),   T(0xc1), T(0xc2), T(0xc3),
3473
    T(0xc4),    T(0xc5),    T(0xc6), T(0xc7),   T(0xc8), T(0xc9), T(0xca),
3474
    T(0xcb),    T(0xcc),    T(0xcd), T(0xce),   T(0xcf), T(0xd0), T(0xd1),
3475
    T(0xd2),    T(0xd3),    T(0xd4), T(0xd5),   T(0xd6), T(0xd7), T(0xd8),
3476
    T(0xd9),    T(0xda),    T(0xdb), T(0xdc),   T(0xdd), T(0xde), T(0xdf),
3477
    T(0xe0),    T(0xe1),    T(0xe2), T(0xe3),   T(0xe4), T(0xe5), T(0xe6),
3478
    T(0xe7),    T(0xe8),    T(0xe9), T(0xea),   T(0xeb), T(0xec), T(0xed),
3479
    T(0xee),    T(0xef),    T(0xf0), T(0xf1),   T(0xf2), T(0xf3), T(0xf4),
3480
    T(0xf5),    T(0xf6),    T(0xf7), T(0xf8),   T(0xf9), T(0xfa), T(0xfb),
3481
    T(0xfc),    T(0xfd),    T(0xfe), 0x01000100};
3482
#undef T
3483
3484
#if defined(LIBYUV_UNATTENUATE_DUP)
3485
// This code mimics the Intel SIMD version for better testability.
3486
#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
3487
#else
3488
#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
3489
#endif
3490
3491
// mimics the Intel SIMD code for exactness.
3492
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3493
                          uint8_t* dst_argb,
3494
0
                          int width) {
3495
0
  int i;
3496
0
  for (i = 0; i < width; ++i) {
3497
0
    uint32_t b = src_argb[0];
3498
0
    uint32_t g = src_argb[1];
3499
0
    uint32_t r = src_argb[2];
3500
0
    const uint32_t a = src_argb[3];
3501
0
    const uint32_t ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
3502
3503
    // Clamping should not be necessary but is free in assembly.
3504
0
    dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
3505
0
    dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
3506
0
    dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
3507
0
    dst_argb[3] = STATIC_CAST(uint8_t, a);
3508
0
    src_argb += 4;
3509
0
    dst_argb += 4;
3510
0
  }
3511
0
}
3512
3513
void ComputeCumulativeSumRow_C(const uint8_t* row,
3514
                               int32_t* cumsum,
3515
                               const int32_t* previous_cumsum,
3516
0
                               int width) {
3517
0
  int32_t row_sum[4] = {0, 0, 0, 0};
3518
0
  int x;
3519
0
  for (x = 0; x < width; ++x) {
3520
0
    row_sum[0] += row[x * 4 + 0];
3521
0
    row_sum[1] += row[x * 4 + 1];
3522
0
    row_sum[2] += row[x * 4 + 2];
3523
0
    row_sum[3] += row[x * 4 + 3];
3524
0
    cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3525
0
    cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3526
0
    cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3527
0
    cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3528
0
  }
3529
0
}
3530
3531
void CumulativeSumToAverageRow_C(const int32_t* tl,
3532
                                 const int32_t* bl,
3533
                                 int w,
3534
                                 int area,
3535
                                 uint8_t* dst,
3536
0
                                 int count) {
3537
0
  float ooa;
3538
0
  int i;
3539
0
  assert(area != 0);
3540
3541
0
  ooa = 1.0f / STATIC_CAST(float, area);
3542
0
  for (i = 0; i < count; ++i) {
3543
0
    dst[0] =
3544
0
        (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
3545
0
                  ooa);
3546
0
    dst[1] =
3547
0
        (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
3548
0
                  ooa);
3549
0
    dst[2] =
3550
0
        (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
3551
0
                  ooa);
3552
0
    dst[3] =
3553
0
        (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
3554
0
                  ooa);
3555
0
    dst += 4;
3556
0
    tl += 4;
3557
0
    bl += 4;
3558
0
  }
3559
0
}
3560
3561
// Copy pixels from rotated source to destination row with a slope.
3562
LIBYUV_API
3563
void ARGBAffineRow_C(const uint8_t* src_argb,
3564
                     int src_argb_stride,
3565
                     uint8_t* dst_argb,
3566
                     const float* uv_dudv,
3567
0
                     int width) {
3568
0
  int i;
3569
  // Render a row of pixels from source into a buffer.
3570
0
  float uv[2];
3571
0
  uv[0] = uv_dudv[0];
3572
0
  uv[1] = uv_dudv[1];
3573
0
  for (i = 0; i < width; ++i) {
3574
0
    int x = (int)(uv[0]);
3575
0
    int y = (int)(uv[1]);
3576
0
    *(uint32_t*)(dst_argb) =
3577
0
        *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3578
0
    dst_argb += 4;
3579
0
    uv[0] += uv_dudv[2];
3580
0
    uv[1] += uv_dudv[3];
3581
0
  }
3582
0
}
3583
3584
// Blend 2 rows into 1.
3585
static void HalfRow_C(const uint8_t* src_uv,
3586
                      ptrdiff_t src_uv_stride,
3587
                      uint8_t* dst_uv,
3588
0
                      int width) {
3589
0
  int x;
3590
0
  for (x = 0; x < width; ++x) {
3591
0
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3592
0
  }
3593
0
}
3594
3595
static void HalfRow_16_C(const uint16_t* src_uv,
3596
                         ptrdiff_t src_uv_stride,
3597
                         uint16_t* dst_uv,
3598
18.7k
                         int width) {
3599
18.7k
  int x;
3600
18.5M
  for (x = 0; x < width; ++x) {
3601
18.5M
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3602
18.5M
  }
3603
18.7k
}
3604
3605
static void HalfRow_16To8_C(const uint16_t* src_uv,
3606
                            ptrdiff_t src_uv_stride,
3607
                            uint8_t* dst_uv,
3608
                            int scale,
3609
0
                            int width) {
3610
0
  int x;
3611
0
  for (x = 0; x < width; ++x) {
3612
0
    dst_uv[x] = STATIC_CAST(
3613
0
        uint8_t,
3614
0
        C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
3615
0
  }
3616
0
}
3617
3618
// C version 2x2 -> 2x1.
3619
void InterpolateRow_C(uint8_t* dst_ptr,
3620
                      const uint8_t* src_ptr,
3621
                      ptrdiff_t src_stride,
3622
                      int width,
3623
0
                      int source_y_fraction) {
3624
0
  int y1_fraction = source_y_fraction;
3625
0
  int y0_fraction = 256 - y1_fraction;
3626
0
  const uint8_t* src_ptr1 = src_ptr + src_stride;
3627
0
  int x;
3628
0
  assert(source_y_fraction >= 0);
3629
0
  assert(source_y_fraction < 256);
3630
3631
0
  if (y1_fraction == 0) {
3632
0
    memcpy(dst_ptr, src_ptr, width);
3633
0
    return;
3634
0
  }
3635
0
  if (y1_fraction == 128) {
3636
0
    HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3637
0
    return;
3638
0
  }
3639
0
  for (x = 0; x < width; ++x) {
3640
0
    dst_ptr[0] = STATIC_CAST(
3641
0
        uint8_t,
3642
0
        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3643
0
    ++src_ptr;
3644
0
    ++src_ptr1;
3645
0
    ++dst_ptr;
3646
0
  }
3647
0
}
3648
3649
// C version 2x2 -> 2x1.
3650
void InterpolateRow_16_C(uint16_t* dst_ptr,
3651
                         const uint16_t* src_ptr,
3652
                         ptrdiff_t src_stride,
3653
                         int width,
3654
4.97M
                         int source_y_fraction) {
3655
4.97M
  int y1_fraction = source_y_fraction;
3656
4.97M
  int y0_fraction = 256 - y1_fraction;
3657
4.97M
  const uint16_t* src_ptr1 = src_ptr + src_stride;
3658
4.97M
  int x;
3659
4.97M
  assert(source_y_fraction >= 0);
3660
4.97M
  assert(source_y_fraction < 256);
3661
3662
4.97M
  if (y1_fraction == 0) {
3663
600k
    memcpy(dst_ptr, src_ptr, width * 2);
3664
600k
    return;
3665
600k
  }
3666
4.37M
  if (y1_fraction == 128) {
3667
18.7k
    HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3668
18.7k
    return;
3669
18.7k
  }
3670
4.96G
  for (x = 0; x < width; ++x) {
3671
4.96G
    dst_ptr[0] = STATIC_CAST(
3672
4.96G
        uint16_t,
3673
4.96G
        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3674
4.96G
    ++src_ptr;
3675
4.96G
    ++src_ptr1;
3676
4.96G
    ++dst_ptr;
3677
4.96G
  }
3678
4.35M
}
3679
3680
// C version 2x2 16 bit-> 2x1 8 bit.
3681
// Use scale to convert lsb formats to msb, depending how many bits there are:
3682
// 32768 = 9 bits
3683
// 16384 = 10 bits
3684
// 4096 = 12 bits
3685
// 256 = 16 bits
3686
// TODO(fbarchard): change scale to bits
3687
3688
void InterpolateRow_16To8_C(uint8_t* dst_ptr,
3689
                            const uint16_t* src_ptr,
3690
                            ptrdiff_t src_stride,
3691
                            int scale,
3692
                            int width,
3693
0
                            int source_y_fraction) {
3694
0
  int y1_fraction = source_y_fraction;
3695
0
  int y0_fraction = 256 - y1_fraction;
3696
0
  const uint16_t* src_ptr1 = src_ptr + src_stride;
3697
0
  int x;
3698
0
  assert(source_y_fraction >= 0);
3699
0
  assert(source_y_fraction < 256);
3700
3701
0
  if (source_y_fraction == 0) {
3702
0
    Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
3703
0
    return;
3704
0
  }
3705
0
  if (source_y_fraction == 128) {
3706
0
    HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
3707
0
    return;
3708
0
  }
3709
0
  for (x = 0; x < width; ++x) {
3710
0
    dst_ptr[0] = STATIC_CAST(
3711
0
        uint8_t,
3712
0
        C16TO8(
3713
0
            (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
3714
0
            scale));
3715
0
    src_ptr += 1;
3716
0
    src_ptr1 += 1;
3717
0
    dst_ptr += 1;
3718
0
  }
3719
0
}
3720
3721
// Use first 4 shuffler values to reorder ARGB channels.
3722
void ARGBShuffleRow_C(const uint8_t* src_argb,
3723
                      uint8_t* dst_argb,
3724
                      const uint8_t* shuffler,
3725
0
                      int width) {
3726
0
  int index0 = shuffler[0];
3727
0
  int index1 = shuffler[1];
3728
0
  int index2 = shuffler[2];
3729
0
  int index3 = shuffler[3];
3730
  // Shuffle a row of ARGB.
3731
0
  int x;
3732
0
  for (x = 0; x < width; ++x) {
3733
    // To support in-place conversion.
3734
0
    uint8_t b = src_argb[index0];
3735
0
    uint8_t g = src_argb[index1];
3736
0
    uint8_t r = src_argb[index2];
3737
0
    uint8_t a = src_argb[index3];
3738
0
    dst_argb[0] = b;
3739
0
    dst_argb[1] = g;
3740
0
    dst_argb[2] = r;
3741
0
    dst_argb[3] = a;
3742
0
    src_argb += 4;
3743
0
    dst_argb += 4;
3744
0
  }
3745
0
}
3746
3747
void I422ToYUY2Row_C(const uint8_t* src_y,
3748
                     const uint8_t* src_u,
3749
                     const uint8_t* src_v,
3750
                     uint8_t* dst_frame,
3751
0
                     int width) {
3752
0
  int x;
3753
0
  for (x = 0; x < width - 1; x += 2) {
3754
0
    dst_frame[0] = src_y[0];
3755
0
    dst_frame[1] = src_u[0];
3756
0
    dst_frame[2] = src_y[1];
3757
0
    dst_frame[3] = src_v[0];
3758
0
    dst_frame += 4;
3759
0
    src_y += 2;
3760
0
    src_u += 1;
3761
0
    src_v += 1;
3762
0
  }
3763
0
  if (width & 1) {
3764
0
    dst_frame[0] = src_y[0];
3765
0
    dst_frame[1] = src_u[0];
3766
0
    dst_frame[2] = 0;
3767
0
    dst_frame[3] = src_v[0];
3768
0
  }
3769
0
}
3770
3771
void I422ToUYVYRow_C(const uint8_t* src_y,
3772
                     const uint8_t* src_u,
3773
                     const uint8_t* src_v,
3774
                     uint8_t* dst_frame,
3775
0
                     int width) {
3776
0
  int x;
3777
0
  for (x = 0; x < width - 1; x += 2) {
3778
0
    dst_frame[0] = src_u[0];
3779
0
    dst_frame[1] = src_y[0];
3780
0
    dst_frame[2] = src_v[0];
3781
0
    dst_frame[3] = src_y[1];
3782
0
    dst_frame += 4;
3783
0
    src_y += 2;
3784
0
    src_u += 1;
3785
0
    src_v += 1;
3786
0
  }
3787
0
  if (width & 1) {
3788
0
    dst_frame[0] = src_u[0];
3789
0
    dst_frame[1] = src_y[0];
3790
0
    dst_frame[2] = src_v[0];
3791
0
    dst_frame[3] = 0;
3792
0
  }
3793
0
}
3794
3795
void ARGBPolynomialRow_C(const uint8_t* src_argb,
3796
                         uint8_t* dst_argb,
3797
                         const float* poly,
3798
0
                         int width) {
3799
0
  int i;
3800
0
  for (i = 0; i < width; ++i) {
3801
0
    float b = (float)(src_argb[0]);
3802
0
    float g = (float)(src_argb[1]);
3803
0
    float r = (float)(src_argb[2]);
3804
0
    float a = (float)(src_argb[3]);
3805
0
    float b2 = b * b;
3806
0
    float g2 = g * g;
3807
0
    float r2 = r * r;
3808
0
    float a2 = a * a;
3809
0
    float db = poly[0] + poly[4] * b;
3810
0
    float dg = poly[1] + poly[5] * g;
3811
0
    float dr = poly[2] + poly[6] * r;
3812
0
    float da = poly[3] + poly[7] * a;
3813
0
    float b3 = b2 * b;
3814
0
    float g3 = g2 * g;
3815
0
    float r3 = r2 * r;
3816
0
    float a3 = a2 * a;
3817
0
    db += poly[8] * b2;
3818
0
    dg += poly[9] * g2;
3819
0
    dr += poly[10] * r2;
3820
0
    da += poly[11] * a2;
3821
0
    db += poly[12] * b3;
3822
0
    dg += poly[13] * g3;
3823
0
    dr += poly[14] * r3;
3824
0
    da += poly[15] * a3;
3825
3826
0
    dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
3827
0
    dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
3828
0
    dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
3829
0
    dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
3830
0
    src_argb += 4;
3831
0
    dst_argb += 4;
3832
0
  }
3833
0
}
3834
3835
// Samples assumed to be unsigned in low 9, 10 or 12 bits.  Scale factor
3836
// adjust the source integer range to the half float range desired.
3837
3838
// This magic constant is 2^-112. Multiplying by this
3839
// is the same as subtracting 112 from the exponent, which
3840
// is the difference in exponent bias between 32-bit and
3841
// 16-bit floats. Once we've done this subtraction, we can
3842
// simply extract the low bits of the exponent and the high
3843
// bits of the mantissa from our float and we're done.
3844
3845
// Work around GCC 7 punning warning -Wstrict-aliasing
3846
#if defined(__GNUC__)
3847
typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3848
#else
3849
typedef uint32_t uint32_alias_t;
3850
#endif
3851
3852
void HalfFloatRow_C(const uint16_t* src,
3853
                    uint16_t* dst,
3854
                    float scale,
3855
0
                    int width) {
3856
0
  int i;
3857
0
  float mult = 1.9259299444e-34f * scale;
3858
0
  for (i = 0; i < width; ++i) {
3859
0
    float value = src[i] * mult;
3860
0
    dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3861
0
  }
3862
0
}
3863
3864
0
void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3865
0
  int i;
3866
0
  for (i = 0; i < width; ++i) {
3867
0
    float value = src[i] * scale;
3868
0
    dst[i] = value;
3869
0
  }
3870
0
}
3871
3872
void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3873
                             uint8_t* dst_argb,
3874
                             int width,
3875
                             const uint8_t* luma,
3876
0
                             uint32_t lumacoeff) {
3877
0
  uint32_t bc = lumacoeff & 0xff;
3878
0
  uint32_t gc = (lumacoeff >> 8) & 0xff;
3879
0
  uint32_t rc = (lumacoeff >> 16) & 0xff;
3880
3881
0
  int i;
3882
0
  for (i = 0; i < width - 1; i += 2) {
3883
    // Luminance in rows, color values in columns.
3884
0
    const uint8_t* luma0 =
3885
0
        ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3886
0
        luma;
3887
0
    const uint8_t* luma1;
3888
0
    dst_argb[0] = luma0[src_argb[0]];
3889
0
    dst_argb[1] = luma0[src_argb[1]];
3890
0
    dst_argb[2] = luma0[src_argb[2]];
3891
0
    dst_argb[3] = src_argb[3];
3892
0
    luma1 =
3893
0
        ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3894
0
        luma;
3895
0
    dst_argb[4] = luma1[src_argb[4]];
3896
0
    dst_argb[5] = luma1[src_argb[5]];
3897
0
    dst_argb[6] = luma1[src_argb[6]];
3898
0
    dst_argb[7] = src_argb[7];
3899
0
    src_argb += 8;
3900
0
    dst_argb += 8;
3901
0
  }
3902
0
  if (width & 1) {
3903
    // Luminance in rows, color values in columns.
3904
0
    const uint8_t* luma0 =
3905
0
        ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3906
0
        luma;
3907
0
    dst_argb[0] = luma0[src_argb[0]];
3908
0
    dst_argb[1] = luma0[src_argb[1]];
3909
0
    dst_argb[2] = luma0[src_argb[2]];
3910
0
    dst_argb[3] = src_argb[3];
3911
0
  }
3912
0
}
3913
3914
0
void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3915
0
  int i;
3916
0
  for (i = 0; i < width - 1; i += 2) {
3917
0
    dst[3] = src[3];
3918
0
    dst[7] = src[7];
3919
0
    dst += 8;
3920
0
    src += 8;
3921
0
  }
3922
0
  if (width & 1) {
3923
0
    dst[3] = src[3];
3924
0
  }
3925
0
}
3926
3927
0
void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3928
0
  int i;
3929
0
  for (i = 0; i < width - 1; i += 2) {
3930
0
    dst_a[0] = src_argb[3];
3931
0
    dst_a[1] = src_argb[7];
3932
0
    dst_a += 2;
3933
0
    src_argb += 8;
3934
0
  }
3935
0
  if (width & 1) {
3936
0
    dst_a[0] = src_argb[3];
3937
0
  }
3938
0
}
3939
3940
0
void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3941
0
  int i;
3942
0
  for (i = 0; i < width - 1; i += 2) {
3943
0
    dst[3] = src[0];
3944
0
    dst[7] = src[1];
3945
0
    dst += 8;
3946
0
    src += 2;
3947
0
  }
3948
0
  if (width & 1) {
3949
0
    dst[3] = src[0];
3950
0
  }
3951
0
}
3952
3953
// Maximum temporary width for wrappers to process at a time, in pixels.
3954
809k
#define MAXTWIDTH 2048
3955
3956
#if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
3957
    defined(HAS_I422TORGB565ROW_SSSE3) && !defined(LIBYUV_ENABLE_ROWWIN)
3958
// row_win.cc has asm version, but GCC uses 2 step wrapper.
3959
void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3960
                           const uint8_t* src_u,
3961
                           const uint8_t* src_v,
3962
                           uint8_t* dst_rgb565,
3963
                           const struct YuvConstants* yuvconstants,
3964
0
                           int width) {
3965
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3966
0
  while (width > 0) {
3967
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3968
0
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3969
0
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3970
0
    src_y += twidth;
3971
0
    src_u += twidth / 2;
3972
0
    src_v += twidth / 2;
3973
0
    dst_rgb565 += twidth * 2;
3974
0
    width -= twidth;
3975
0
  }
3976
0
}
3977
#endif
3978
3979
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
3980
void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3981
                             const uint8_t* src_u,
3982
                             const uint8_t* src_v,
3983
                             uint8_t* dst_argb1555,
3984
                             const struct YuvConstants* yuvconstants,
3985
0
                             int width) {
3986
  // Row buffer for intermediate ARGB pixels.
3987
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3988
0
  while (width > 0) {
3989
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3990
0
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3991
0
    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3992
0
    src_y += twidth;
3993
0
    src_u += twidth / 2;
3994
0
    src_v += twidth / 2;
3995
0
    dst_argb1555 += twidth * 2;
3996
0
    width -= twidth;
3997
0
  }
3998
0
}
3999
#endif
4000
4001
#if defined(HAS_I422TOARGB4444ROW_SSSE3)
4002
void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
4003
                             const uint8_t* src_u,
4004
                             const uint8_t* src_v,
4005
                             uint8_t* dst_argb4444,
4006
                             const struct YuvConstants* yuvconstants,
4007
0
                             int width) {
4008
  // Row buffer for intermediate ARGB pixels.
4009
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4010
0
  while (width > 0) {
4011
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4012
0
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4013
0
    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4014
0
    src_y += twidth;
4015
0
    src_u += twidth / 2;
4016
0
    src_v += twidth / 2;
4017
0
    dst_argb4444 += twidth * 2;
4018
0
    width -= twidth;
4019
0
  }
4020
0
}
4021
#endif
4022
4023
#if defined(HAS_NV12TORGB565ROW_SSSE3)
4024
void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
4025
                           const uint8_t* src_uv,
4026
                           uint8_t* dst_rgb565,
4027
                           const struct YuvConstants* yuvconstants,
4028
0
                           int width) {
4029
  // Row buffer for intermediate ARGB pixels.
4030
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4031
0
  while (width > 0) {
4032
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4033
0
    NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
4034
0
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4035
0
    src_y += twidth;
4036
0
    src_uv += twidth;
4037
0
    dst_rgb565 += twidth * 2;
4038
0
    width -= twidth;
4039
0
  }
4040
0
}
4041
#endif
4042
4043
#if defined(HAS_NV12TORGB24ROW_SSSE3)
4044
void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
4045
                          const uint8_t* src_uv,
4046
                          uint8_t* dst_rgb24,
4047
                          const struct YuvConstants* yuvconstants,
4048
0
                          int width) {
4049
  // Row buffer for intermediate ARGB pixels.
4050
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4051
0
  while (width > 0) {
4052
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4053
0
    NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
4054
0
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4055
0
    src_y += twidth;
4056
0
    src_uv += twidth;
4057
0
    dst_rgb24 += twidth * 3;
4058
0
    width -= twidth;
4059
0
  }
4060
0
}
4061
#endif
4062
4063
#if defined(HAS_NV21TORGB24ROW_SSSE3)
4064
void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
4065
                          const uint8_t* src_vu,
4066
                          uint8_t* dst_rgb24,
4067
                          const struct YuvConstants* yuvconstants,
4068
0
                          int width) {
4069
  // Row buffer for intermediate ARGB pixels.
4070
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4071
0
  while (width > 0) {
4072
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4073
0
    NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
4074
0
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4075
0
    src_y += twidth;
4076
0
    src_vu += twidth;
4077
0
    dst_rgb24 += twidth * 3;
4078
0
    width -= twidth;
4079
0
  }
4080
0
}
4081
#endif
4082
4083
#if defined(HAS_NV12TORGB24ROW_AVX2)
4084
void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
4085
                         const uint8_t* src_uv,
4086
                         uint8_t* dst_rgb24,
4087
                         const struct YuvConstants* yuvconstants,
4088
0
                         int width) {
4089
  // Row buffer for intermediate ARGB pixels.
4090
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4091
0
  while (width > 0) {
4092
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4093
0
    NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4094
0
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4095
0
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4096
#else
4097
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4098
#endif
4099
0
    src_y += twidth;
4100
0
    src_uv += twidth;
4101
0
    dst_rgb24 += twidth * 3;
4102
0
    width -= twidth;
4103
0
  }
4104
0
}
4105
#endif
4106
4107
#if defined(HAS_NV21TORGB24ROW_AVX2)
4108
void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
4109
                         const uint8_t* src_vu,
4110
                         uint8_t* dst_rgb24,
4111
                         const struct YuvConstants* yuvconstants,
4112
0
                         int width) {
4113
  // Row buffer for intermediate ARGB pixels.
4114
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4115
0
  while (width > 0) {
4116
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4117
0
    NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
4118
0
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4119
0
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4120
#else
4121
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4122
#endif
4123
0
    src_y += twidth;
4124
0
    src_vu += twidth;
4125
0
    dst_rgb24 += twidth * 3;
4126
0
    width -= twidth;
4127
0
  }
4128
0
}
4129
#endif
4130
4131
#if defined(HAS_I422TORGB565ROW_AVX2)
4132
void I422ToRGB565Row_AVX2(const uint8_t* src_y,
4133
                          const uint8_t* src_u,
4134
                          const uint8_t* src_v,
4135
                          uint8_t* dst_rgb565,
4136
                          const struct YuvConstants* yuvconstants,
4137
3.71k
                          int width) {
4138
3.71k
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4139
7.43k
  while (width > 0) {
4140
3.71k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4141
3.71k
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4142
#if defined(HAS_ARGBTORGB565ROW_AVX2)
4143
    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4144
#else
4145
3.71k
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4146
3.71k
#endif
4147
3.71k
    src_y += twidth;
4148
3.71k
    src_u += twidth / 2;
4149
3.71k
    src_v += twidth / 2;
4150
3.71k
    dst_rgb565 += twidth * 2;
4151
3.71k
    width -= twidth;
4152
3.71k
  }
4153
3.71k
}
4154
#endif
4155
4156
#if defined(HAS_I422TOARGB1555ROW_AVX2)
4157
void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
4158
                            const uint8_t* src_u,
4159
                            const uint8_t* src_v,
4160
                            uint8_t* dst_argb1555,
4161
                            const struct YuvConstants* yuvconstants,
4162
0
                            int width) {
4163
  // Row buffer for intermediate ARGB pixels.
4164
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4165
0
  while (width > 0) {
4166
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4167
0
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4168
#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
4169
    ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
4170
#else
4171
0
    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
4172
0
#endif
4173
0
    src_y += twidth;
4174
0
    src_u += twidth / 2;
4175
0
    src_v += twidth / 2;
4176
0
    dst_argb1555 += twidth * 2;
4177
0
    width -= twidth;
4178
0
  }
4179
0
}
4180
#endif
4181
4182
#if defined(HAS_I422TOARGB4444ROW_AVX2)
4183
void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
4184
                            const uint8_t* src_u,
4185
                            const uint8_t* src_v,
4186
                            uint8_t* dst_argb4444,
4187
                            const struct YuvConstants* yuvconstants,
4188
0
                            int width) {
4189
  // Row buffer for intermediate ARGB pixels.
4190
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4191
0
  while (width > 0) {
4192
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4193
0
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4194
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
4195
    ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
4196
#else
4197
0
    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4198
0
#endif
4199
0
    src_y += twidth;
4200
0
    src_u += twidth / 2;
4201
0
    src_v += twidth / 2;
4202
0
    dst_argb4444 += twidth * 2;
4203
0
    width -= twidth;
4204
0
  }
4205
0
}
4206
#endif
4207
4208
#if defined(HAS_I422TORGB24ROW_AVX2)
4209
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
4210
                         const uint8_t* src_u,
4211
                         const uint8_t* src_v,
4212
                         uint8_t* dst_rgb24,
4213
                         const struct YuvConstants* yuvconstants,
4214
4.22k
                         int width) {
4215
  // Row buffer for intermediate ARGB pixels.
4216
4.22k
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4217
8.44k
  while (width > 0) {
4218
4.22k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4219
4.22k
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4220
4.22k
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4221
4.22k
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4222
#else
4223
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4224
#endif
4225
4.22k
    src_y += twidth;
4226
4.22k
    src_u += twidth / 2;
4227
4.22k
    src_v += twidth / 2;
4228
4.22k
    dst_rgb24 += twidth * 3;
4229
4.22k
    width -= twidth;
4230
4.22k
  }
4231
4.22k
}
4232
#endif
4233
4234
#if defined(HAS_I444TORGB24ROW_AVX2)
4235
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
4236
                         const uint8_t* src_u,
4237
                         const uint8_t* src_v,
4238
                         uint8_t* dst_rgb24,
4239
                         const struct YuvConstants* yuvconstants,
4240
6.90k
                         int width) {
4241
  // Row buffer for intermediate ARGB pixels.
4242
6.90k
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4243
14.0k
  while (width > 0) {
4244
7.16k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4245
7.16k
    I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4246
7.16k
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4247
7.16k
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4248
#else
4249
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4250
#endif
4251
7.16k
    src_y += twidth;
4252
7.16k
    src_u += twidth;
4253
7.16k
    src_v += twidth;
4254
7.16k
    dst_rgb24 += twidth * 3;
4255
7.16k
    width -= twidth;
4256
7.16k
  }
4257
6.90k
}
4258
#endif
4259
4260
#if defined(HAS_NV12TORGB565ROW_AVX2)
4261
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
4262
                          const uint8_t* src_uv,
4263
                          uint8_t* dst_rgb565,
4264
                          const struct YuvConstants* yuvconstants,
4265
0
                          int width) {
4266
  // Row buffer for intermediate ARGB pixels.
4267
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4268
0
  while (width > 0) {
4269
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4270
0
    NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4271
#if defined(HAS_ARGBTORGB565ROW_AVX2)
4272
    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4273
#else
4274
0
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4275
0
#endif
4276
0
    src_y += twidth;
4277
0
    src_uv += twidth;
4278
0
    dst_rgb565 += twidth * 2;
4279
0
    width -= twidth;
4280
0
  }
4281
0
}
4282
#endif
4283
4284
#ifdef HAS_RGB24TOYJROW_AVX2
4285
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
4286
0
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4287
  // Row buffer for intermediate ARGB pixels.
4288
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4289
0
  while (width > 0) {
4290
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4291
0
    RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4292
0
    ARGBToYJRow_AVX2(row, dst_yj, twidth);
4293
0
    src_rgb24 += twidth * 3;
4294
0
    dst_yj += twidth;
4295
0
    width -= twidth;
4296
0
  }
4297
0
}
4298
#endif  // HAS_RGB24TOYJROW_AVX2
4299
4300
#ifdef HAS_RAWTOYJROW_AVX2
4301
// Convert 32 RAW pixels (128 bytes) to 32 YJ values.
4302
121
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4303
  // Row buffer for intermediate ARGB pixels.
4304
121
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4305
397k
  while (width > 0) {
4306
396k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4307
396k
#ifdef HAS_RAWTOARGBROW_AVX2
4308
396k
    RAWToARGBRow_AVX2(src_raw, row, twidth);
4309
#else
4310
    RAWToARGBRow_SSSE3(src_raw, row, twidth);
4311
#endif
4312
396k
    ARGBToYJRow_AVX2(row, dst_yj, twidth);
4313
396k
    src_raw += twidth * 3;
4314
396k
    dst_yj += twidth;
4315
396k
    width -= twidth;
4316
396k
  }
4317
121
}
4318
#endif  // HAS_RAWTOYJROW_AVX2
4319
4320
#ifdef HAS_RGB24TOYJROW_SSSE3
4321
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
4322
0
void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4323
  // Row buffer for intermediate ARGB pixels.
4324
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4325
0
  while (width > 0) {
4326
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4327
0
    RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4328
0
    ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4329
0
    src_rgb24 += twidth * 3;
4330
0
    dst_yj += twidth;
4331
0
    width -= twidth;
4332
0
  }
4333
0
}
4334
#endif  // HAS_RGB24TOYJROW_SSSE3
4335
4336
#ifdef HAS_RAWTOYJROW_SSSE3
4337
// Convert 16 RAW pixels (64 bytes) to 16 YJ values.
4338
0
void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4339
  // Row buffer for intermediate ARGB pixels.
4340
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4341
0
  while (width > 0) {
4342
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4343
0
    RAWToARGBRow_SSSE3(src_raw, row, twidth);
4344
0
    ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4345
0
    src_raw += twidth * 3;
4346
0
    dst_yj += twidth;
4347
0
    width -= twidth;
4348
0
  }
4349
0
}
4350
#endif  // HAS_RAWTOYJROW_SSSE3
4351
4352
#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
4353
void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
4354
                               const uint16_t* src_ptr,
4355
                               ptrdiff_t src_stride,
4356
                               int scale,
4357
                               int width,
4358
0
                               int source_y_fraction) {
4359
  // Row buffer for intermediate 16 bit pixels.
4360
0
  SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
4361
0
  while (width > 0) {
4362
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4363
0
    InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
4364
0
    Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
4365
0
    src_ptr += twidth;
4366
0
    dst_ptr += twidth;
4367
0
    width -= twidth;
4368
0
  }
4369
0
}
4370
#endif  // HAS_INTERPOLATEROW_16TO8_AVX2
4371
4372
0
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4373
0
  float fsum = 0.f;
4374
0
  int i;
4375
0
  for (i = 0; i < width; ++i) {
4376
0
    float v = *src++;
4377
0
    fsum += v * v;
4378
0
    *dst++ = v * scale;
4379
0
  }
4380
0
  return fsum;
4381
0
}
4382
4383
0
float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4384
0
  float fmax = 0.f;
4385
0
  int i;
4386
0
  for (i = 0; i < width; ++i) {
4387
0
    float v = *src++;
4388
0
    float vs = v * scale;
4389
0
    fmax = (v > fmax) ? v : fmax;
4390
0
    *dst++ = vs;
4391
0
  }
4392
0
  return fmax;
4393
0
}
4394
4395
0
void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4396
0
  int i;
4397
0
  for (i = 0; i < width; ++i) {
4398
0
    *dst++ = *src++ * scale;
4399
0
  }
4400
0
}
4401
4402
0
void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4403
0
  int i;
4404
0
  for (i = 0; i < width; ++i) {
4405
0
    *dst++ = STATIC_CAST(
4406
0
        uint16_t,
4407
0
        (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
4408
0
    ++src;
4409
0
  }
4410
0
}
4411
4412
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
4413
void GaussCol_C(const uint16_t* src0,
4414
                const uint16_t* src1,
4415
                const uint16_t* src2,
4416
                const uint16_t* src3,
4417
                const uint16_t* src4,
4418
                uint32_t* dst,
4419
0
                int width) {
4420
0
  int i;
4421
0
  for (i = 0; i < width; ++i) {
4422
0
    *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4423
0
  }
4424
0
}
4425
4426
0
void GaussRow_F32_C(const float* src, float* dst, int width) {
4427
0
  int i;
4428
0
  for (i = 0; i < width; ++i) {
4429
0
    *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4430
0
             (1.0f / 256.0f);
4431
0
    ++src;
4432
0
  }
4433
0
}
4434
4435
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
4436
void GaussCol_F32_C(const float* src0,
4437
                    const float* src1,
4438
                    const float* src2,
4439
                    const float* src3,
4440
                    const float* src4,
4441
                    float* dst,
4442
0
                    int width) {
4443
0
  int i;
4444
0
  for (i = 0; i < width; ++i) {
4445
0
    *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4446
0
  }
4447
0
}
4448
4449
// Convert biplanar NV21 to packed YUV24
4450
void NV21ToYUV24Row_C(const uint8_t* src_y,
4451
                      const uint8_t* src_vu,
4452
                      uint8_t* dst_yuv24,
4453
0
                      int width) {
4454
0
  int x;
4455
0
  for (x = 0; x < width - 1; x += 2) {
4456
0
    dst_yuv24[0] = src_vu[0];  // V
4457
0
    dst_yuv24[1] = src_vu[1];  // U
4458
0
    dst_yuv24[2] = src_y[0];   // Y0
4459
0
    dst_yuv24[3] = src_vu[0];  // V
4460
0
    dst_yuv24[4] = src_vu[1];  // U
4461
0
    dst_yuv24[5] = src_y[1];   // Y1
4462
0
    src_y += 2;
4463
0
    src_vu += 2;
4464
0
    dst_yuv24 += 6;  // Advance 2 pixels.
4465
0
  }
4466
0
  if (width & 1) {
4467
0
    dst_yuv24[0] = src_vu[0];  // V
4468
0
    dst_yuv24[1] = src_vu[1];  // U
4469
0
    dst_yuv24[2] = src_y[0];   // Y0
4470
0
  }
4471
0
}
4472
4473
// Filter 2 rows of AYUV UV's (444) into UV (420).
4474
// AYUV is VUYA in memory.  UV for NV12 is UV order in memory.
4475
void AYUVToUVRow_C(const uint8_t* src_ayuv,
4476
                   int src_stride_ayuv,
4477
                   uint8_t* dst_uv,
4478
0
                   int width) {
4479
  // Output a row of UV values, filtering 2x2 rows of AYUV.
4480
0
  int x;
4481
0
  for (x = 0; x < width - 1; x += 2) {
4482
0
    dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4483
0
                 src_ayuv[src_stride_ayuv + 5] + 2) >>
4484
0
                2;
4485
0
    dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4486
0
                 src_ayuv[src_stride_ayuv + 4] + 2) >>
4487
0
                2;
4488
0
    src_ayuv += 8;
4489
0
    dst_uv += 2;
4490
0
  }
4491
0
  if (width & 1) {
4492
0
    dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4493
0
    dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4494
0
  }
4495
0
}
4496
4497
// Filter 2 rows of AYUV UV's (444) into VU (420).
4498
void AYUVToVURow_C(const uint8_t* src_ayuv,
4499
                   int src_stride_ayuv,
4500
                   uint8_t* dst_vu,
4501
0
                   int width) {
4502
  // Output a row of VU values, filtering 2x2 rows of AYUV.
4503
0
  int x;
4504
0
  for (x = 0; x < width - 1; x += 2) {
4505
0
    dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4506
0
                 src_ayuv[src_stride_ayuv + 4] + 2) >>
4507
0
                2;
4508
0
    dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4509
0
                 src_ayuv[src_stride_ayuv + 5] + 2) >>
4510
0
                2;
4511
0
    src_ayuv += 8;
4512
0
    dst_vu += 2;
4513
0
  }
4514
0
  if (width & 1) {
4515
0
    dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4516
0
    dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4517
0
  }
4518
0
}
4519
4520
// Copy row of AYUV Y's into Y
4521
0
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4522
  // Output a row of Y values.
4523
0
  int x;
4524
0
  for (x = 0; x < width; ++x) {
4525
0
    dst_y[x] = src_ayuv[2];  // v,u,y,a
4526
0
    src_ayuv += 4;
4527
0
  }
4528
0
}
4529
4530
// Convert UV plane of NV12 to VU of NV21.
4531
0
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4532
0
  int x;
4533
0
  for (x = 0; x < width; ++x) {
4534
0
    uint8_t u = src_uv[0];
4535
0
    uint8_t v = src_uv[1];
4536
0
    dst_vu[0] = v;
4537
0
    dst_vu[1] = u;
4538
0
    src_uv += 2;
4539
0
    dst_vu += 2;
4540
0
  }
4541
0
}
4542
4543
void HalfMergeUVRow_C(const uint8_t* src_u,
4544
                      int src_stride_u,
4545
                      const uint8_t* src_v,
4546
                      int src_stride_v,
4547
                      uint8_t* dst_uv,
4548
0
                      int width) {
4549
0
  int x;
4550
0
  for (x = 0; x < width - 1; x += 2) {
4551
0
    dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4552
0
                 src_u[src_stride_u + 1] + 2) >>
4553
0
                2;
4554
0
    dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4555
0
                 src_v[src_stride_v + 1] + 2) >>
4556
0
                2;
4557
0
    src_u += 2;
4558
0
    src_v += 2;
4559
0
    dst_uv += 2;
4560
0
  }
4561
0
  if (width & 1) {
4562
0
    dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4563
0
    dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4564
0
  }
4565
0
}
4566
4567
#undef STATIC_CAST
4568
4569
#ifdef __cplusplus
4570
}  // extern "C"
4571
}  // namespace libyuv
4572
#endif