Coverage Report

Created: 2026-01-18 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/row_common.cc
Line
Count
Source
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/row.h"
12
13
#include <assert.h>
14
#include <string.h>  // For memcpy and memset.
15
16
#include "libyuv/basic_types.h"
17
#include "libyuv/convert_argb.h"  // For kYuvI601Constants
18
19
#ifdef __cplusplus
20
namespace libyuv {
21
extern "C" {
22
#endif
23
24
#ifdef __cplusplus
25
6.81G
#define STATIC_CAST(type, expr) static_cast<type>(expr)
26
#else
27
#define STATIC_CAST(type, expr) (type)(expr)
28
#endif
29
30
// This macro controls YUV to RGB using unsigned math to extend range of
31
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
32
// LIBYUV_UNLIMITED_DATA
33
34
// Macros to enable unlimited data for each colorspace
35
// LIBYUV_UNLIMITED_BT601
36
// LIBYUV_UNLIMITED_BT709
37
// LIBYUV_UNLIMITED_BT2020
38
39
#if defined(LIBYUV_BIT_EXACT)
40
#define LIBYUV_UNATTENUATE_DUP 1
41
#endif
42
43
// llvm x86 is poor at ternary operator, so use branchless min/max.
44
45
#define USE_BRANCHLESS 1
46
#if defined(USE_BRANCHLESS)
47
0
static __inline int32_t clamp0(int32_t v) {
48
0
  return -(v >= 0) & v;
49
0
}
50
// TODO(fbarchard): make clamp255 preserve negative values.
51
0
static __inline int32_t clamp255(int32_t v) {
52
0
  return (-(v >= 255) | v) & 255;
53
0
}
54
55
0
static __inline int32_t clamp1023(int32_t v) {
56
0
  return (-(v >= 1023) | v) & 1023;
57
0
}
58
59
// clamp to max
60
0
static __inline int32_t ClampMax(int32_t v, int32_t max) {
61
0
  return (-(v >= max) | v) & max;
62
0
}
63
64
0
static __inline uint32_t Abs(int32_t v) {
65
0
  int m = -(v < 0);
66
0
  return (v + m) ^ m;
67
0
}
68
#else   // USE_BRANCHLESS
69
static __inline int32_t clamp0(int32_t v) {
70
  return (v < 0) ? 0 : v;
71
}
72
73
static __inline int32_t clamp255(int32_t v) {
74
  return (v > 255) ? 255 : v;
75
}
76
77
static __inline int32_t clamp1023(int32_t v) {
78
  return (v > 1023) ? 1023 : v;
79
}
80
81
static __inline int32_t ClampMax(int32_t v, int32_t max) {
82
  return (v > max) ? max : v;
83
}
84
85
static __inline uint32_t Abs(int32_t v) {
86
  return (v < 0) ? -v : v;
87
}
88
#endif  // USE_BRANCHLESS
89
0
static __inline uint32_t Clamp(int32_t val) {
90
0
  int v = clamp0(val);
91
0
  return (uint32_t)(clamp255(v));
92
0
}
93
94
0
static __inline uint32_t Clamp10(int32_t val) {
95
0
  int v = clamp0(val);
96
0
  return (uint32_t)(clamp1023(v));
97
0
}
98
99
// Little Endian
100
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
101
    defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) ||     \
102
    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
103
0
#define WRITEWORD(p, v) *(uint32_t*)(p) = v
104
#else
105
static inline void WRITEWORD(uint8_t* p, uint32_t v) {
106
  p[0] = (uint8_t)(v & 255);
107
  p[1] = (uint8_t)((v >> 8) & 255);
108
  p[2] = (uint8_t)((v >> 16) & 255);
109
  p[3] = (uint8_t)((v >> 24) & 255);
110
}
111
#endif
112
113
0
void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
114
0
  int x;
115
0
  for (x = 0; x < width; ++x) {
116
0
    uint8_t b = src_rgb24[0];
117
0
    uint8_t g = src_rgb24[1];
118
0
    uint8_t r = src_rgb24[2];
119
0
    dst_argb[0] = b;
120
0
    dst_argb[1] = g;
121
0
    dst_argb[2] = r;
122
0
    dst_argb[3] = 255u;
123
0
    dst_argb += 4;
124
0
    src_rgb24 += 3;
125
0
  }
126
0
}
127
128
0
void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
129
0
  int x;
130
0
  for (x = 0; x < width; ++x) {
131
0
    uint8_t r = src_raw[0];
132
0
    uint8_t g = src_raw[1];
133
0
    uint8_t b = src_raw[2];
134
0
    dst_argb[0] = b;
135
0
    dst_argb[1] = g;
136
0
    dst_argb[2] = r;
137
0
    dst_argb[3] = 255u;
138
0
    dst_argb += 4;
139
0
    src_raw += 3;
140
0
  }
141
0
}
142
143
0
void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
144
0
  int x;
145
0
  for (x = 0; x < width; ++x) {
146
0
    uint8_t r = src_raw[0];
147
0
    uint8_t g = src_raw[1];
148
0
    uint8_t b = src_raw[2];
149
0
    dst_rgba[0] = 255u;
150
0
    dst_rgba[1] = b;
151
0
    dst_rgba[2] = g;
152
0
    dst_rgba[3] = r;
153
0
    dst_rgba += 4;
154
0
    src_raw += 3;
155
0
  }
156
0
}
157
158
0
void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
159
0
  int x;
160
0
  for (x = 0; x < width; ++x) {
161
0
    uint8_t r = src_raw[0];
162
0
    uint8_t g = src_raw[1];
163
0
    uint8_t b = src_raw[2];
164
0
    dst_rgb24[0] = b;
165
0
    dst_rgb24[1] = g;
166
0
    dst_rgb24[2] = r;
167
0
    dst_rgb24 += 3;
168
0
    src_raw += 3;
169
0
  }
170
0
}
171
172
void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
173
                       uint8_t* dst_argb,
174
0
                       int width) {
175
0
  int x;
176
0
  for (x = 0; x < width; ++x) {
177
0
    uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
178
0
    uint8_t g = STATIC_CAST(
179
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
180
0
    uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
181
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
182
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
183
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
184
0
    dst_argb[3] = 255u;
185
0
    dst_argb += 4;
186
0
    src_rgb565 += 2;
187
0
  }
188
0
}
189
190
void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
191
                         uint8_t* dst_argb,
192
0
                         int width) {
193
0
  int x;
194
0
  for (x = 0; x < width; ++x) {
195
0
    uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
196
0
    uint8_t g = STATIC_CAST(
197
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
198
0
    uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
199
0
    uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
200
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
201
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
202
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
203
0
    dst_argb[3] = -a;
204
0
    dst_argb += 4;
205
0
    src_argb1555 += 2;
206
0
  }
207
0
}
208
209
void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
210
                         uint8_t* dst_argb,
211
0
                         int width) {
212
0
  int x;
213
0
  for (x = 0; x < width; ++x) {
214
0
    uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
215
0
    uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
216
0
    uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
217
0
    uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
218
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
219
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
220
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
221
0
    dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
222
0
    dst_argb += 4;
223
0
    src_argb4444 += 2;
224
0
  }
225
0
}
226
227
0
void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
228
0
  int x;
229
0
  for (x = 0; x < width; ++x) {
230
0
    uint32_t ar30;
231
0
    memcpy(&ar30, src_ar30, sizeof ar30);
232
0
    uint32_t b = (ar30 >> 2) & 0xff;
233
0
    uint32_t g = (ar30 >> 12) & 0xff;
234
0
    uint32_t r = (ar30 >> 22) & 0xff;
235
0
    uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
236
0
    *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
237
0
    dst_argb += 4;
238
0
    src_ar30 += 4;
239
0
  }
240
0
}
241
242
0
void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
243
0
  int x;
244
0
  for (x = 0; x < width; ++x) {
245
0
    uint32_t ar30;
246
0
    memcpy(&ar30, src_ar30, sizeof ar30);
247
0
    uint32_t b = (ar30 >> 2) & 0xff;
248
0
    uint32_t g = (ar30 >> 12) & 0xff;
249
0
    uint32_t r = (ar30 >> 22) & 0xff;
250
0
    uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
251
0
    *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
252
0
    dst_abgr += 4;
253
0
    src_ar30 += 4;
254
0
  }
255
0
}
256
257
0
void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
258
0
  int x;
259
0
  for (x = 0; x < width; ++x) {
260
0
    uint32_t ar30;
261
0
    memcpy(&ar30, src_ar30, sizeof ar30);
262
0
    uint32_t b = ar30 & 0x3ff;
263
0
    uint32_t ga = ar30 & 0xc00ffc00;
264
0
    uint32_t r = (ar30 >> 20) & 0x3ff;
265
0
    *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
266
0
    dst_ab30 += 4;
267
0
    src_ar30 += 4;
268
0
  }
269
0
}
270
271
0
void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
272
0
  int x;
273
0
  for (x = 0; x < width; ++x) {
274
0
    uint8_t b = src_argb[0];
275
0
    uint8_t g = src_argb[1];
276
0
    uint8_t r = src_argb[2];
277
0
    uint8_t a = src_argb[3];
278
0
    dst_abgr[0] = r;
279
0
    dst_abgr[1] = g;
280
0
    dst_abgr[2] = b;
281
0
    dst_abgr[3] = a;
282
0
    dst_abgr += 4;
283
0
    src_argb += 4;
284
0
  }
285
0
}
286
287
0
void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
288
0
  int x;
289
0
  for (x = 0; x < width; ++x) {
290
0
    uint8_t b = src_argb[0];
291
0
    uint8_t g = src_argb[1];
292
0
    uint8_t r = src_argb[2];
293
0
    uint8_t a = src_argb[3];
294
0
    dst_bgra[0] = a;
295
0
    dst_bgra[1] = r;
296
0
    dst_bgra[2] = g;
297
0
    dst_bgra[3] = b;
298
0
    dst_bgra += 4;
299
0
    src_argb += 4;
300
0
  }
301
0
}
302
303
0
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
304
0
  int x;
305
0
  for (x = 0; x < width; ++x) {
306
0
    uint8_t b = src_argb[0];
307
0
    uint8_t g = src_argb[1];
308
0
    uint8_t r = src_argb[2];
309
0
    uint8_t a = src_argb[3];
310
0
    dst_rgba[0] = a;
311
0
    dst_rgba[1] = b;
312
0
    dst_rgba[2] = g;
313
0
    dst_rgba[3] = r;
314
0
    dst_rgba += 4;
315
0
    src_argb += 4;
316
0
  }
317
0
}
318
319
0
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
320
0
  int x;
321
0
  for (x = 0; x < width; ++x) {
322
0
    uint8_t b = src_argb[0];
323
0
    uint8_t g = src_argb[1];
324
0
    uint8_t r = src_argb[2];
325
0
    dst_rgb[0] = b;
326
0
    dst_rgb[1] = g;
327
0
    dst_rgb[2] = r;
328
0
    dst_rgb += 3;
329
0
    src_argb += 4;
330
0
  }
331
0
}
332
333
0
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
334
0
  int x;
335
0
  for (x = 0; x < width; ++x) {
336
0
    uint8_t b = src_argb[0];
337
0
    uint8_t g = src_argb[1];
338
0
    uint8_t r = src_argb[2];
339
0
    dst_rgb[0] = r;
340
0
    dst_rgb[1] = g;
341
0
    dst_rgb[2] = b;
342
0
    dst_rgb += 3;
343
0
    src_argb += 4;
344
0
  }
345
0
}
346
347
0
void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
348
0
  int x;
349
0
  for (x = 0; x < width; ++x) {
350
0
    uint8_t a = src_rgba[0];
351
0
    uint8_t b = src_rgba[1];
352
0
    uint8_t g = src_rgba[2];
353
0
    uint8_t r = src_rgba[3];
354
0
    dst_argb[0] = b;
355
0
    dst_argb[1] = g;
356
0
    dst_argb[2] = r;
357
0
    dst_argb[3] = a;
358
0
    dst_argb += 4;
359
0
    src_rgba += 4;
360
0
  }
361
0
}
362
363
0
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
364
0
  int x;
365
0
  for (x = 0; x < width - 1; x += 2) {
366
0
    uint8_t b0 = src_argb[0] >> 3;
367
0
    uint8_t g0 = src_argb[1] >> 2;
368
0
    uint8_t r0 = src_argb[2] >> 3;
369
0
    uint8_t b1 = src_argb[4] >> 3;
370
0
    uint8_t g1 = src_argb[5] >> 2;
371
0
    uint8_t r1 = src_argb[6] >> 3;
372
0
    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
373
0
                           (r1 << 27));
374
0
    dst_rgb += 4;
375
0
    src_argb += 8;
376
0
  }
377
0
  if (width & 1) {
378
0
    uint8_t b0 = src_argb[0] >> 3;
379
0
    uint8_t g0 = src_argb[1] >> 2;
380
0
    uint8_t r0 = src_argb[2] >> 3;
381
0
    *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
382
0
  }
383
0
}
384
385
// dither4 is a row of 4 values from 4x4 dither matrix.
386
// The 4x4 matrix contains values to increase RGB.  When converting to
387
// fewer bits (565) this provides an ordered dither.
388
// The order in the 4x4 matrix in first byte is upper left.
389
// The 4 values are passed as an int, then referenced as an array, so
390
// endian will not affect order of the original matrix.  But the dither4
391
// will containing the first pixel in the lower byte for little endian
392
// or the upper byte for big endian.
393
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
394
                             uint8_t* dst_rgb,
395
                             uint32_t dither4,
396
0
                             int width) {
397
0
  int x;
398
0
  for (x = 0; x < width - 1; x += 2) {
399
0
    int dither0 = ((const unsigned char*)(&dither4))[x & 3];
400
0
    int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
401
0
    uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
402
0
    uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
403
0
    uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
404
0
    uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
405
0
    uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
406
0
    uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
407
0
    *(uint16_t*)(dst_rgb + 0) =
408
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
409
0
    *(uint16_t*)(dst_rgb + 2) =
410
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
411
0
    dst_rgb += 4;
412
0
    src_argb += 8;
413
0
  }
414
0
  if (width & 1) {
415
0
    int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
416
0
    uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
417
0
    uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
418
0
    uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
419
0
    *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
420
0
  }
421
0
}
422
423
0
void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
424
0
  int x;
425
0
  for (x = 0; x < width - 1; x += 2) {
426
0
    uint8_t b0 = src_argb[0] >> 3;
427
0
    uint8_t g0 = src_argb[1] >> 3;
428
0
    uint8_t r0 = src_argb[2] >> 3;
429
0
    uint8_t a0 = src_argb[3] >> 7;
430
0
    uint8_t b1 = src_argb[4] >> 3;
431
0
    uint8_t g1 = src_argb[5] >> 3;
432
0
    uint8_t r1 = src_argb[6] >> 3;
433
0
    uint8_t a1 = src_argb[7] >> 7;
434
0
    *(uint16_t*)(dst_rgb + 0) =
435
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
436
0
    *(uint16_t*)(dst_rgb + 2) =
437
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
438
0
    dst_rgb += 4;
439
0
    src_argb += 8;
440
0
  }
441
0
  if (width & 1) {
442
0
    uint8_t b0 = src_argb[0] >> 3;
443
0
    uint8_t g0 = src_argb[1] >> 3;
444
0
    uint8_t r0 = src_argb[2] >> 3;
445
0
    uint8_t a0 = src_argb[3] >> 7;
446
0
    *(uint16_t*)(dst_rgb) =
447
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
448
0
  }
449
0
}
450
451
0
void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
452
0
  int x;
453
0
  for (x = 0; x < width - 1; x += 2) {
454
0
    uint8_t b0 = src_argb[0] >> 4;
455
0
    uint8_t g0 = src_argb[1] >> 4;
456
0
    uint8_t r0 = src_argb[2] >> 4;
457
0
    uint8_t a0 = src_argb[3] >> 4;
458
0
    uint8_t b1 = src_argb[4] >> 4;
459
0
    uint8_t g1 = src_argb[5] >> 4;
460
0
    uint8_t r1 = src_argb[6] >> 4;
461
0
    uint8_t a1 = src_argb[7] >> 4;
462
0
    *(uint16_t*)(dst_rgb + 0) =
463
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
464
0
    *(uint16_t*)(dst_rgb + 2) =
465
0
        STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
466
0
    dst_rgb += 4;
467
0
    src_argb += 8;
468
0
  }
469
0
  if (width & 1) {
470
0
    uint8_t b0 = src_argb[0] >> 4;
471
0
    uint8_t g0 = src_argb[1] >> 4;
472
0
    uint8_t r0 = src_argb[2] >> 4;
473
0
    uint8_t a0 = src_argb[3] >> 4;
474
0
    *(uint16_t*)(dst_rgb) =
475
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
476
0
  }
477
0
}
478
479
0
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
480
0
  int x;
481
0
  for (x = 0; x < width; ++x) {
482
0
    uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
483
0
    uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
484
0
    uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
485
0
    uint32_t a0 = (src_abgr[3] >> 6);
486
0
    *(uint32_t*)(dst_ar30) =
487
0
        STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
488
0
    dst_ar30 += 4;
489
0
    src_abgr += 4;
490
0
  }
491
0
}
492
493
0
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
494
0
  int x;
495
0
  for (x = 0; x < width; ++x) {
496
0
    uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
497
0
    uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
498
0
    uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
499
0
    uint32_t a0 = (src_argb[3] >> 6);
500
0
    *(uint32_t*)(dst_ar30) =
501
0
        STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
502
0
    dst_ar30 += 4;
503
0
    src_argb += 4;
504
0
  }
505
0
}
506
507
0
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
508
0
  int x;
509
0
  for (x = 0; x < width; ++x) {
510
0
    uint16_t b = src_argb[0] * 0x0101;
511
0
    uint16_t g = src_argb[1] * 0x0101;
512
0
    uint16_t r = src_argb[2] * 0x0101;
513
0
    uint16_t a = src_argb[3] * 0x0101;
514
0
    dst_ar64[0] = b;
515
0
    dst_ar64[1] = g;
516
0
    dst_ar64[2] = r;
517
0
    dst_ar64[3] = a;
518
0
    dst_ar64 += 4;
519
0
    src_argb += 4;
520
0
  }
521
0
}
522
523
0
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
524
0
  int x;
525
0
  for (x = 0; x < width; ++x) {
526
0
    uint16_t b = src_argb[0] * 0x0101;
527
0
    uint16_t g = src_argb[1] * 0x0101;
528
0
    uint16_t r = src_argb[2] * 0x0101;
529
0
    uint16_t a = src_argb[3] * 0x0101;
530
0
    dst_ab64[0] = r;
531
0
    dst_ab64[1] = g;
532
0
    dst_ab64[2] = b;
533
0
    dst_ab64[3] = a;
534
0
    dst_ab64 += 4;
535
0
    src_argb += 4;
536
0
  }
537
0
}
538
539
0
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
540
0
  int x;
541
0
  for (x = 0; x < width; ++x) {
542
0
    uint8_t b = src_ar64[0] >> 8;
543
0
    uint8_t g = src_ar64[1] >> 8;
544
0
    uint8_t r = src_ar64[2] >> 8;
545
0
    uint8_t a = src_ar64[3] >> 8;
546
0
    dst_argb[0] = b;
547
0
    dst_argb[1] = g;
548
0
    dst_argb[2] = r;
549
0
    dst_argb[3] = a;
550
0
    dst_argb += 4;
551
0
    src_ar64 += 4;
552
0
  }
553
0
}
554
555
0
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
556
0
  int x;
557
0
  for (x = 0; x < width; ++x) {
558
0
    uint8_t r = src_ab64[0] >> 8;
559
0
    uint8_t g = src_ab64[1] >> 8;
560
0
    uint8_t b = src_ab64[2] >> 8;
561
0
    uint8_t a = src_ab64[3] >> 8;
562
0
    dst_argb[0] = b;
563
0
    dst_argb[1] = g;
564
0
    dst_argb[2] = r;
565
0
    dst_argb[3] = a;
566
0
    dst_argb += 4;
567
0
    src_ab64 += 4;
568
0
  }
569
0
}
570
571
0
void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
572
0
  int x;
573
0
  for (x = 0; x < width; ++x) {
574
0
    uint16_t b = src_ar64[0];
575
0
    uint16_t g = src_ar64[1];
576
0
    uint16_t r = src_ar64[2];
577
0
    uint16_t a = src_ar64[3];
578
0
    dst_ab64[0] = r;
579
0
    dst_ab64[1] = g;
580
0
    dst_ab64[2] = b;
581
0
    dst_ab64[3] = a;
582
0
    dst_ab64 += 4;
583
0
    src_ar64 += 4;
584
0
  }
585
0
}
586
587
// TODO(fbarchard): Make shuffle compatible with SIMD versions
588
void AR64ShuffleRow_C(const uint8_t* src_ar64,
589
                      uint8_t* dst_ar64,
590
                      const uint8_t* shuffler,
591
0
                      int width) {
592
0
  const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
593
0
  uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
594
0
  int index0 = shuffler[0] / 2;
595
0
  int index1 = shuffler[2] / 2;
596
0
  int index2 = shuffler[4] / 2;
597
0
  int index3 = shuffler[6] / 2;
598
  // Shuffle a row of AR64.
599
0
  int x;
600
0
  for (x = 0; x < width / 2; ++x) {
601
    // To support in-place conversion.
602
0
    uint16_t b = src_ar64_16[index0];
603
0
    uint16_t g = src_ar64_16[index1];
604
0
    uint16_t r = src_ar64_16[index2];
605
0
    uint16_t a = src_ar64_16[index3];
606
0
    dst_ar64_16[0] = b;
607
0
    dst_ar64_16[1] = g;
608
0
    dst_ar64_16[2] = r;
609
0
    dst_ar64_16[3] = a;
610
0
    src_ar64_16 += 4;
611
0
    dst_ar64_16 += 4;
612
0
  }
613
0
}
614
// BT601 8 bit Y:
615
// b 0.114 * 219 = 24.966  = 25
616
// g 0.587 * 219 = 128.553 = 129
617
// r 0.299 * 219 = 65.481  = 66
618
// BT601 8 bit U:
619
// b  0.875  * 128 = 112.0    = 112
620
// g -0.5781 * 128 = −73.9968 = -74
621
// r -0.2969 * 128 = −38.0032 = -38
622
// BT601 8 bit V:
623
// b -0.1406 * 128 = −17.9968 = -18
624
// g -0.7344 * 128 = −94.0032 = -94
625
// r  0.875  * 128 = 112.0    = 112
626
0
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
627
0
  return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
628
0
}
629
0
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
630
0
  return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
631
0
}
632
0
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
633
0
  return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
634
0
}
635
0
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
636
637
#define MAKEROWY(NAME, R, G, B, BPP)                                       \
638
0
  void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
639
0
    int x;                                                                 \
640
0
    for (x = 0; x < width; ++x) {                                          \
641
0
      dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
642
0
      src_rgb += BPP;                                                      \
643
0
      dst_y += 1;                                                          \
644
0
    }                                                                      \
645
0
  }                                                                        \
Unexecuted instantiation: ARGBToYRow_C
Unexecuted instantiation: BGRAToYRow_C
Unexecuted instantiation: ABGRToYRow_C
Unexecuted instantiation: RGBAToYRow_C
Unexecuted instantiation: RGB24ToYRow_C
Unexecuted instantiation: RAWToYRow_C
646
  void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
647
0
                       uint8_t* dst_u, uint8_t* dst_v, int width) {        \
648
0
    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
649
0
    int x;                                                                 \
650
0
    for (x = 0; x < width - 1; x += 2) {                                   \
651
0
      uint8_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +          \
652
0
                    src_rgb1[B + BPP] + 2) >>                              \
653
0
                   2;                                                      \
654
0
      uint8_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +          \
655
0
                    src_rgb1[G + BPP] + 2) >>                              \
656
0
                   2;                                                      \
657
0
      uint8_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +          \
658
0
                    src_rgb1[R + BPP] + 2) >>                              \
659
0
                   2;                                                      \
660
0
      dst_u[0] = RGBToU(ar, ag, ab);                                       \
661
0
      dst_v[0] = RGBToV(ar, ag, ab);                                       \
662
0
      src_rgb += BPP * 2;                                                  \
663
0
      src_rgb1 += BPP * 2;                                                 \
664
0
      dst_u += 1;                                                          \
665
0
      dst_v += 1;                                                          \
666
0
    }                                                                      \
667
0
    if (width & 1) {                                                       \
668
0
      uint8_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1;                    \
669
0
      uint8_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1;                    \
670
0
      uint8_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1;                    \
671
0
      dst_u[0] = RGBToU(ar, ag, ab);                                       \
672
0
      dst_v[0] = RGBToV(ar, ag, ab);                                       \
673
0
    }                                                                      \
674
0
  }
Unexecuted instantiation: ARGBToUVRow_C
Unexecuted instantiation: BGRAToUVRow_C
Unexecuted instantiation: ABGRToUVRow_C
Unexecuted instantiation: RGBAToUVRow_C
Unexecuted instantiation: RGB24ToUVRow_C
Unexecuted instantiation: RAWToUVRow_C
675
676
MAKEROWY(ARGB, 2, 1, 0, 4)
677
MAKEROWY(BGRA, 1, 2, 3, 4)
678
MAKEROWY(ABGR, 0, 1, 2, 4)
679
MAKEROWY(RGBA, 3, 2, 1, 4)
680
MAKEROWY(RGB24, 2, 1, 0, 3)
681
MAKEROWY(RAW, 0, 1, 2, 3)
682
#undef MAKEROWY
683
684
// JPeg uses BT.601-1 full range
685
// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
686
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
687
// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
688
// JPeg 8 bit Y:
689
// b 0.11400 * 256 = 29.184 = 29
690
// g 0.58700 * 256 = 150.272 = 150
691
// r 0.29900 * 256 = 76.544 = 77
692
// JPeg 8 bit U:
693
// b  0.50000 * 256 = 128.0 = 128
694
// g -0.33126 * 256 = −84.80256 = -85
695
// r -0.16874 * 256 = −43.19744 = -43
696
// JPeg 8 bit V:
697
// b -0.08131 * 256 = −20.81536 = -21
698
// g -0.41869 * 256 = −107.18464 = -107
699
// r  0.50000 * 256 = 128.0 = 128
700
701
// 8 bit
702
0
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
703
0
  return (77 * r + 150 * g + 29 * b + 128) >> 8;
704
0
}
705
0
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
706
0
  return (128 * b - 85 * g - 43 * r + 0x8000) >> 8;
707
0
}
708
0
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
709
0
  return (128 * r - 107 * g - 21 * b + 0x8000) >> 8;
710
0
}
711
712
// ARGBToYJ_C and ARGBToUVJ_C
713
#define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
714
0
  void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
715
0
    int x;                                                                  \
716
0
    for (x = 0; x < width; ++x) {                                           \
717
0
      dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
718
0
      src_rgb += BPP;                                                       \
719
0
      dst_y += 1;                                                           \
720
0
    }                                                                       \
721
0
  }                                                                         \
Unexecuted instantiation: ARGBToYJRow_C
Unexecuted instantiation: ABGRToYJRow_C
Unexecuted instantiation: RGBAToYJRow_C
Unexecuted instantiation: RGB24ToYJRow_C
Unexecuted instantiation: RAWToYJRow_C
722
  void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
723
0
                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
724
0
    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
725
0
    int x;                                                                  \
726
0
    for (x = 0; x < width - 1; x += 2) {                                    \
727
0
      uint8_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +           \
728
0
                    src_rgb1[B + BPP] + 2) >>                               \
729
0
                   2;                                                       \
730
0
      uint8_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +           \
731
0
                    src_rgb1[G + BPP] + 2) >>                               \
732
0
                   2;                                                       \
733
0
      uint8_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +           \
734
0
                    src_rgb1[R + BPP] + 2) >>                               \
735
0
                   2;                                                       \
736
0
      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
737
0
      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
738
0
      src_rgb += BPP * 2;                                                   \
739
0
      src_rgb1 += BPP * 2;                                                  \
740
0
      dst_u += 1;                                                           \
741
0
      dst_v += 1;                                                           \
742
0
    }                                                                       \
743
0
    if (width & 1) {                                                        \
744
0
      uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1;                    \
745
0
      uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1;                    \
746
0
      uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1;                    \
747
0
      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
748
0
      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
749
0
    }                                                                       \
750
0
  }
Unexecuted instantiation: ARGBToUVJRow_C
Unexecuted instantiation: ABGRToUVJRow_C
Unexecuted instantiation: RGBAToUVJRow_C
Unexecuted instantiation: RGB24ToUVJRow_C
Unexecuted instantiation: RAWToUVJRow_C
751
752
MAKEROWYJ(ARGB, 2, 1, 0, 4)
753
MAKEROWYJ(ABGR, 0, 1, 2, 4)
754
MAKEROWYJ(RGBA, 3, 2, 1, 4)
755
MAKEROWYJ(RGB24, 2, 1, 0, 3)
756
MAKEROWYJ(RAW, 0, 1, 2, 3)
757
#undef MAKEROWYJ
758
759
0
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
760
0
  int x;
761
0
  for (x = 0; x < width; ++x) {
762
0
    uint8_t b = src_rgb565[0] & 0x1f;
763
0
    uint8_t g = STATIC_CAST(
764
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
765
0
    uint8_t r = src_rgb565[1] >> 3;
766
0
    b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
767
0
    g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
768
0
    r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
769
0
    dst_y[0] = RGBToY(r, g, b);
770
0
    src_rgb565 += 2;
771
0
    dst_y += 1;
772
0
  }
773
0
}
774
775
0
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
776
0
  int x;
777
0
  for (x = 0; x < width; ++x) {
778
0
    uint8_t b = src_argb1555[0] & 0x1f;
779
0
    uint8_t g = STATIC_CAST(
780
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
781
0
    uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
782
0
    b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
783
0
    g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
784
0
    r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
785
0
    dst_y[0] = RGBToY(r, g, b);
786
0
    src_argb1555 += 2;
787
0
    dst_y += 1;
788
0
  }
789
0
}
790
791
0
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
792
0
  int x;
793
0
  for (x = 0; x < width; ++x) {
794
0
    uint8_t b = src_argb4444[0] & 0x0f;
795
0
    uint8_t g = src_argb4444[0] >> 4;
796
0
    uint8_t r = src_argb4444[1] & 0x0f;
797
0
    b = STATIC_CAST(uint8_t, (b << 4) | b);
798
0
    g = STATIC_CAST(uint8_t, (g << 4) | g);
799
0
    r = STATIC_CAST(uint8_t, (r << 4) | r);
800
0
    dst_y[0] = RGBToY(r, g, b);
801
0
    src_argb4444 += 2;
802
0
    dst_y += 1;
803
0
  }
804
0
}
805
806
void RGB565ToUVRow_C(const uint8_t* src_rgb565,
807
                     int src_stride_rgb565,
808
                     uint8_t* dst_u,
809
                     uint8_t* dst_v,
810
0
                     int width) {
811
0
  const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
812
0
  int x;
813
0
  for (x = 0; x < width - 1; x += 2) {
814
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
815
0
    uint8_t g0 = STATIC_CAST(
816
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
817
0
    uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
818
0
    uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
819
0
    uint8_t g1 = STATIC_CAST(
820
0
        uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
821
0
    uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
822
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
823
0
    uint8_t g2 = STATIC_CAST(
824
0
        uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
825
0
    uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
826
0
    uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
827
0
    uint8_t g3 = STATIC_CAST(
828
0
        uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
829
0
    uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
830
831
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
832
0
    g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
833
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
834
0
    b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
835
0
    g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
836
0
    r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
837
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
838
0
    g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
839
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
840
0
    b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
841
0
    g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
842
0
    r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
843
844
0
    uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
845
0
    uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
846
0
    uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
847
0
    dst_u[0] = RGBToU(r, g, b);
848
0
    dst_v[0] = RGBToV(r, g, b);
849
850
0
    src_rgb565 += 4;
851
0
    next_rgb565 += 4;
852
0
    dst_u += 1;
853
0
    dst_v += 1;
854
0
  }
855
0
  if (width & 1) {
856
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
857
0
    uint8_t g0 = STATIC_CAST(
858
0
        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
859
0
    uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
860
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
861
0
    uint8_t g2 = STATIC_CAST(
862
0
        uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
863
0
    uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
864
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
865
0
    g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
866
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
867
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
868
0
    g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
869
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
870
871
0
    uint8_t ab = AVGB(b0, b2);
872
0
    uint8_t ag = AVGB(g0, g2);
873
0
    uint8_t ar = AVGB(r0, r2);
874
0
    dst_u[0] = RGBToU(ar, ag, ab);
875
0
    dst_v[0] = RGBToV(ar, ag, ab);
876
0
  }
877
0
}
878
879
void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
880
                       int src_stride_argb1555,
881
                       uint8_t* dst_u,
882
                       uint8_t* dst_v,
883
0
                       int width) {
884
0
  const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
885
0
  int x;
886
0
  for (x = 0; x < width - 1; x += 2) {
887
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
888
0
    uint8_t g0 = STATIC_CAST(
889
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
890
0
    uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
891
0
    uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
892
0
    uint8_t g1 = STATIC_CAST(
893
0
        uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
894
0
    uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
895
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
896
0
    uint8_t g2 = STATIC_CAST(
897
0
        uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
898
0
    uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
899
0
    uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
900
0
    uint8_t g3 = STATIC_CAST(
901
0
        uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
902
0
    uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
903
904
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
905
0
    g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
906
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
907
0
    b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
908
0
    g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
909
0
    r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
910
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
911
0
    g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
912
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
913
0
    b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
914
0
    g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
915
0
    r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
916
917
0
    uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
918
0
    uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
919
0
    uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
920
0
    dst_u[0] = RGBToU(r, g, b);
921
0
    dst_v[0] = RGBToV(r, g, b);
922
923
0
    src_argb1555 += 4;
924
0
    next_argb1555 += 4;
925
0
    dst_u += 1;
926
0
    dst_v += 1;
927
0
  }
928
0
  if (width & 1) {
929
0
    uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
930
0
    uint8_t g0 = STATIC_CAST(
931
0
        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
932
0
    uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
933
0
    uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
934
0
    uint8_t g2 = STATIC_CAST(
935
0
        uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
936
0
    uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
937
938
0
    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
939
0
    g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
940
0
    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
941
0
    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
942
0
    g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
943
0
    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
944
945
0
    uint8_t ab = AVGB(b0, b2);
946
0
    uint8_t ag = AVGB(g0, g2);
947
0
    uint8_t ar = AVGB(r0, r2);
948
0
    dst_u[0] = RGBToU(ar, ag, ab);
949
0
    dst_v[0] = RGBToV(ar, ag, ab);
950
0
  }
951
0
}
952
953
void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
954
                       int src_stride_argb4444,
955
                       uint8_t* dst_u,
956
                       uint8_t* dst_v,
957
0
                       int width) {
958
0
  const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
959
0
  int x;
960
0
  for (x = 0; x < width - 1; x += 2) {
961
0
    uint8_t b0 = src_argb4444[0] & 0x0f;
962
0
    uint8_t g0 = src_argb4444[0] >> 4;
963
0
    uint8_t r0 = src_argb4444[1] & 0x0f;
964
0
    uint8_t b1 = src_argb4444[2] & 0x0f;
965
0
    uint8_t g1 = src_argb4444[2] >> 4;
966
0
    uint8_t r1 = src_argb4444[3] & 0x0f;
967
0
    uint8_t b2 = next_argb4444[0] & 0x0f;
968
0
    uint8_t g2 = next_argb4444[0] >> 4;
969
0
    uint8_t r2 = next_argb4444[1] & 0x0f;
970
0
    uint8_t b3 = next_argb4444[2] & 0x0f;
971
0
    uint8_t g3 = next_argb4444[2] >> 4;
972
0
    uint8_t r3 = next_argb4444[3] & 0x0f;
973
974
0
    b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
975
0
    g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
976
0
    r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
977
0
    b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
978
0
    g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
979
0
    r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
980
0
    b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
981
0
    g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
982
0
    r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
983
0
    b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
984
0
    g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
985
0
    r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
986
987
0
    uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
988
0
    uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
989
0
    uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
990
0
    dst_u[0] = RGBToU(r, g, b);
991
0
    dst_v[0] = RGBToV(r, g, b);
992
993
0
    src_argb4444 += 4;
994
0
    next_argb4444 += 4;
995
0
    dst_u += 1;
996
0
    dst_v += 1;
997
0
  }
998
0
  if (width & 1) {
999
0
    uint8_t b0 = src_argb4444[0] & 0x0f;
1000
0
    uint8_t g0 = src_argb4444[0] >> 4;
1001
0
    uint8_t r0 = src_argb4444[1] & 0x0f;
1002
0
    uint8_t b2 = next_argb4444[0] & 0x0f;
1003
0
    uint8_t g2 = next_argb4444[0] >> 4;
1004
0
    uint8_t r2 = next_argb4444[1] & 0x0f;
1005
1006
0
    b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1007
0
    g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1008
0
    r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1009
0
    b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1010
0
    g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1011
0
    r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1012
1013
0
    uint8_t ab = AVGB(b0, b2);
1014
0
    uint8_t ag = AVGB(g0, g2);
1015
0
    uint8_t ar = AVGB(r0, r2);
1016
0
    dst_u[0] = RGBToU(ar, ag, ab);
1017
0
    dst_v[0] = RGBToV(ar, ag, ab);
1018
0
  }
1019
0
}
1020
1021
void ARGBToUV444Row_C(const uint8_t* src_argb,
1022
                      uint8_t* dst_u,
1023
                      uint8_t* dst_v,
1024
0
                      int width) {
1025
0
  int x;
1026
0
  for (x = 0; x < width; ++x) {
1027
0
    uint8_t ab = src_argb[0];
1028
0
    uint8_t ag = src_argb[1];
1029
0
    uint8_t ar = src_argb[2];
1030
0
    dst_u[0] = RGBToU(ar, ag, ab);
1031
0
    dst_v[0] = RGBToV(ar, ag, ab);
1032
0
    src_argb += 4;
1033
0
    dst_u += 1;
1034
0
    dst_v += 1;
1035
0
  }
1036
0
}
1037
1038
void ARGBToUVJ444Row_C(const uint8_t* src_argb,
1039
                       uint8_t* dst_u,
1040
                       uint8_t* dst_v,
1041
0
                       int width) {
1042
0
  int x;
1043
0
  for (x = 0; x < width; ++x) {
1044
0
    uint8_t ab = src_argb[0];
1045
0
    uint8_t ag = src_argb[1];
1046
0
    uint8_t ar = src_argb[2];
1047
0
    dst_u[0] = RGBToUJ(ar, ag, ab);
1048
0
    dst_v[0] = RGBToVJ(ar, ag, ab);
1049
0
    src_argb += 4;
1050
0
    dst_u += 1;
1051
0
    dst_v += 1;
1052
0
  }
1053
0
}
1054
1055
0
void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1056
0
  int x;
1057
0
  for (x = 0; x < width; ++x) {
1058
0
    uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1059
0
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1060
0
    dst_argb[3] = src_argb[3];
1061
0
    dst_argb += 4;
1062
0
    src_argb += 4;
1063
0
  }
1064
0
}
1065
1066
// Convert a row of image to Sepia tone.
1067
0
void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1068
0
  int x;
1069
0
  for (x = 0; x < width; ++x) {
1070
0
    int b = dst_argb[0];
1071
0
    int g = dst_argb[1];
1072
0
    int r = dst_argb[2];
1073
0
    int sb = (b * 17 + g * 68 + r * 35) >> 7;
1074
0
    int sg = (b * 22 + g * 88 + r * 45) >> 7;
1075
0
    int sr = (b * 24 + g * 98 + r * 50) >> 7;
1076
    // b does not over flow. a is preserved from original.
1077
0
    dst_argb[0] = STATIC_CAST(uint8_t, sb);
1078
0
    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
1079
0
    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
1080
0
    dst_argb += 4;
1081
0
  }
1082
0
}
1083
1084
// Apply color matrix to a row of image. Matrix is signed.
1085
// TODO(fbarchard): Consider adding rounding (+32).
1086
void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1087
                          uint8_t* dst_argb,
1088
                          const int8_t* matrix_argb,
1089
0
                          int width) {
1090
0
  int x;
1091
0
  for (x = 0; x < width; ++x) {
1092
0
    int b = src_argb[0];
1093
0
    int g = src_argb[1];
1094
0
    int r = src_argb[2];
1095
0
    int a = src_argb[3];
1096
0
    int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1097
0
              a * matrix_argb[3]) >>
1098
0
             6;
1099
0
    int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1100
0
              a * matrix_argb[7]) >>
1101
0
             6;
1102
0
    int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1103
0
              a * matrix_argb[11]) >>
1104
0
             6;
1105
0
    int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1106
0
              a * matrix_argb[15]) >>
1107
0
             6;
1108
0
    dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
1109
0
    dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
1110
0
    dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
1111
0
    dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
1112
0
    src_argb += 4;
1113
0
    dst_argb += 4;
1114
0
  }
1115
0
}
1116
1117
// Apply color table to a row of image.
1118
void ARGBColorTableRow_C(uint8_t* dst_argb,
1119
                         const uint8_t* table_argb,
1120
0
                         int width) {
1121
0
  int x;
1122
0
  for (x = 0; x < width; ++x) {
1123
0
    int b = dst_argb[0];
1124
0
    int g = dst_argb[1];
1125
0
    int r = dst_argb[2];
1126
0
    int a = dst_argb[3];
1127
0
    dst_argb[0] = table_argb[b * 4 + 0];
1128
0
    dst_argb[1] = table_argb[g * 4 + 1];
1129
0
    dst_argb[2] = table_argb[r * 4 + 2];
1130
0
    dst_argb[3] = table_argb[a * 4 + 3];
1131
0
    dst_argb += 4;
1132
0
  }
1133
0
}
1134
1135
// Apply color table to a row of image.
1136
void RGBColorTableRow_C(uint8_t* dst_argb,
1137
                        const uint8_t* table_argb,
1138
0
                        int width) {
1139
0
  int x;
1140
0
  for (x = 0; x < width; ++x) {
1141
0
    int b = dst_argb[0];
1142
0
    int g = dst_argb[1];
1143
0
    int r = dst_argb[2];
1144
0
    dst_argb[0] = table_argb[b * 4 + 0];
1145
0
    dst_argb[1] = table_argb[g * 4 + 1];
1146
0
    dst_argb[2] = table_argb[r * 4 + 2];
1147
0
    dst_argb += 4;
1148
0
  }
1149
0
}
1150
1151
void ARGBQuantizeRow_C(uint8_t* dst_argb,
1152
                       int scale,
1153
                       int interval_size,
1154
                       int interval_offset,
1155
0
                       int width) {
1156
0
  int x;
1157
0
  for (x = 0; x < width; ++x) {
1158
0
    int b = dst_argb[0];
1159
0
    int g = dst_argb[1];
1160
0
    int r = dst_argb[2];
1161
0
    dst_argb[0] = STATIC_CAST(
1162
0
        uint8_t, (b * scale >> 16) * interval_size + interval_offset);
1163
0
    dst_argb[1] = STATIC_CAST(
1164
0
        uint8_t, (g * scale >> 16) * interval_size + interval_offset);
1165
0
    dst_argb[2] = STATIC_CAST(
1166
0
        uint8_t, (r * scale >> 16) * interval_size + interval_offset);
1167
0
    dst_argb += 4;
1168
0
  }
1169
0
}
1170
1171
0
#define REPEAT8(v) (v) | ((v) << 8)
1172
0
#define SHADE(f, v) v* f >> 24
1173
1174
void ARGBShadeRow_C(const uint8_t* src_argb,
1175
                    uint8_t* dst_argb,
1176
                    int width,
1177
0
                    uint32_t value) {
1178
0
  const uint32_t b_scale = REPEAT8(value & 0xff);
1179
0
  const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1180
0
  const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1181
0
  const uint32_t a_scale = REPEAT8(value >> 24);
1182
1183
0
  int i;
1184
0
  for (i = 0; i < width; ++i) {
1185
0
    const uint32_t b = REPEAT8(src_argb[0]);
1186
0
    const uint32_t g = REPEAT8(src_argb[1]);
1187
0
    const uint32_t r = REPEAT8(src_argb[2]);
1188
0
    const uint32_t a = REPEAT8(src_argb[3]);
1189
0
    dst_argb[0] = SHADE(b, b_scale);
1190
0
    dst_argb[1] = SHADE(g, g_scale);
1191
0
    dst_argb[2] = SHADE(r, r_scale);
1192
0
    dst_argb[3] = SHADE(a, a_scale);
1193
0
    src_argb += 4;
1194
0
    dst_argb += 4;
1195
0
  }
1196
0
}
1197
#undef REPEAT8
1198
#undef SHADE
1199
1200
void ARGBMultiplyRow_C(const uint8_t* src_argb,
1201
                       const uint8_t* src_argb1,
1202
                       uint8_t* dst_argb,
1203
0
                       int width) {
1204
0
  int i;
1205
0
  for (i = 0; i < width; ++i) {
1206
0
    const uint32_t b = src_argb[0];
1207
0
    const uint32_t g = src_argb[1];
1208
0
    const uint32_t r = src_argb[2];
1209
0
    const uint32_t a = src_argb[3];
1210
0
    const uint32_t b_scale = src_argb1[0];
1211
0
    const uint32_t g_scale = src_argb1[1];
1212
0
    const uint32_t r_scale = src_argb1[2];
1213
0
    const uint32_t a_scale = src_argb1[3];
1214
0
    dst_argb[0] = STATIC_CAST(uint8_t, (b * b_scale + 128) >> 8);
1215
0
    dst_argb[1] = STATIC_CAST(uint8_t, (g * g_scale + 128) >> 8);
1216
0
    dst_argb[2] = STATIC_CAST(uint8_t, (r * r_scale + 128) >> 8);
1217
0
    dst_argb[3] = STATIC_CAST(uint8_t, (a * a_scale + 128) >> 8);
1218
0
    src_argb += 4;
1219
0
    src_argb1 += 4;
1220
0
    dst_argb += 4;
1221
0
  }
1222
0
}
1223
1224
#define SHADE(f, v) clamp255(v + f)
1225
1226
void ARGBAddRow_C(const uint8_t* src_argb,
1227
                  const uint8_t* src_argb1,
1228
                  uint8_t* dst_argb,
1229
0
                  int width) {
1230
0
  int i;
1231
0
  for (i = 0; i < width; ++i) {
1232
0
    const int b = src_argb[0];
1233
0
    const int g = src_argb[1];
1234
0
    const int r = src_argb[2];
1235
0
    const int a = src_argb[3];
1236
0
    const int b_add = src_argb1[0];
1237
0
    const int g_add = src_argb1[1];
1238
0
    const int r_add = src_argb1[2];
1239
0
    const int a_add = src_argb1[3];
1240
0
    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
1241
0
    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
1242
0
    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
1243
0
    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
1244
0
    src_argb += 4;
1245
0
    src_argb1 += 4;
1246
0
    dst_argb += 4;
1247
0
  }
1248
0
}
1249
#undef SHADE
1250
1251
#define SHADE(f, v) clamp0(f - v)
1252
1253
void ARGBSubtractRow_C(const uint8_t* src_argb,
1254
                       const uint8_t* src_argb1,
1255
                       uint8_t* dst_argb,
1256
0
                       int width) {
1257
0
  int i;
1258
0
  for (i = 0; i < width; ++i) {
1259
0
    const int b = src_argb[0];
1260
0
    const int g = src_argb[1];
1261
0
    const int r = src_argb[2];
1262
0
    const int a = src_argb[3];
1263
0
    const int b_sub = src_argb1[0];
1264
0
    const int g_sub = src_argb1[1];
1265
0
    const int r_sub = src_argb1[2];
1266
0
    const int a_sub = src_argb1[3];
1267
0
    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
1268
0
    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
1269
0
    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
1270
0
    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
1271
0
    src_argb += 4;
1272
0
    src_argb1 += 4;
1273
0
    dst_argb += 4;
1274
0
  }
1275
0
}
1276
#undef SHADE
1277
1278
// Sobel functions which mimics SSSE3.
1279
void SobelXRow_C(const uint8_t* src_y0,
1280
                 const uint8_t* src_y1,
1281
                 const uint8_t* src_y2,
1282
                 uint8_t* dst_sobelx,
1283
0
                 int width) {
1284
0
  int i;
1285
0
  for (i = 0; i < width; ++i) {
1286
0
    int a = src_y0[i];
1287
0
    int b = src_y1[i];
1288
0
    int c = src_y2[i];
1289
0
    int a_sub = src_y0[i + 2];
1290
0
    int b_sub = src_y1[i + 2];
1291
0
    int c_sub = src_y2[i + 2];
1292
0
    int a_diff = a - a_sub;
1293
0
    int b_diff = b - b_sub;
1294
0
    int c_diff = c - c_sub;
1295
0
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1296
0
    dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1297
0
  }
1298
0
}
1299
1300
void SobelYRow_C(const uint8_t* src_y0,
1301
                 const uint8_t* src_y1,
1302
                 uint8_t* dst_sobely,
1303
0
                 int width) {
1304
0
  int i;
1305
0
  for (i = 0; i < width; ++i) {
1306
0
    int a = src_y0[i + 0];
1307
0
    int b = src_y0[i + 1];
1308
0
    int c = src_y0[i + 2];
1309
0
    int a_sub = src_y1[i + 0];
1310
0
    int b_sub = src_y1[i + 1];
1311
0
    int c_sub = src_y1[i + 2];
1312
0
    int a_diff = a - a_sub;
1313
0
    int b_diff = b - b_sub;
1314
0
    int c_diff = c - c_sub;
1315
0
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1316
0
    dst_sobely[i] = (uint8_t)(clamp255(sobel));
1317
0
  }
1318
0
}
1319
1320
void SobelRow_C(const uint8_t* src_sobelx,
1321
                const uint8_t* src_sobely,
1322
                uint8_t* dst_argb,
1323
0
                int width) {
1324
0
  int i;
1325
0
  for (i = 0; i < width; ++i) {
1326
0
    int r = src_sobelx[i];
1327
0
    int b = src_sobely[i];
1328
0
    int s = clamp255(r + b);
1329
0
    dst_argb[0] = (uint8_t)(s);
1330
0
    dst_argb[1] = (uint8_t)(s);
1331
0
    dst_argb[2] = (uint8_t)(s);
1332
0
    dst_argb[3] = (uint8_t)(255u);
1333
0
    dst_argb += 4;
1334
0
  }
1335
0
}
1336
1337
void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1338
                       const uint8_t* src_sobely,
1339
                       uint8_t* dst_y,
1340
0
                       int width) {
1341
0
  int i;
1342
0
  for (i = 0; i < width; ++i) {
1343
0
    int r = src_sobelx[i];
1344
0
    int b = src_sobely[i];
1345
0
    int s = clamp255(r + b);
1346
0
    dst_y[i] = (uint8_t)(s);
1347
0
  }
1348
0
}
1349
1350
void SobelXYRow_C(const uint8_t* src_sobelx,
1351
                  const uint8_t* src_sobely,
1352
                  uint8_t* dst_argb,
1353
0
                  int width) {
1354
0
  int i;
1355
0
  for (i = 0; i < width; ++i) {
1356
0
    int r = src_sobelx[i];
1357
0
    int b = src_sobely[i];
1358
0
    int g = clamp255(r + b);
1359
0
    dst_argb[0] = (uint8_t)(b);
1360
0
    dst_argb[1] = (uint8_t)(g);
1361
0
    dst_argb[2] = (uint8_t)(r);
1362
0
    dst_argb[3] = (uint8_t)(255u);
1363
0
    dst_argb += 4;
1364
0
  }
1365
0
}
1366
1367
0
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1368
  // Copy a Y to RGB.
1369
0
  int x;
1370
0
  for (x = 0; x < width; ++x) {
1371
0
    uint8_t y = src_y[0];
1372
0
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1373
0
    dst_argb[3] = 255u;
1374
0
    dst_argb += 4;
1375
0
    ++src_y;
1376
0
  }
1377
0
}
1378
1379
// Macros to create SIMD specific yuv to rgb conversion constants.
1380
1381
// clang-format off
1382
1383
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1384
// Bias values include subtract 128 from U and V, bias from Y and rounding.
1385
// For B and R bias is negative. For G bias is positive.
1386
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR)                             \
1387
  {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                     \
1388
   {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1389
    0, 0}}
1390
#else
1391
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR)                     \
1392
  {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,          \
1393
    UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},         \
1394
   {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,  \
1395
    UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1396
   {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,          \
1397
    0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},         \
1398
   {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1399
   {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1400
#endif
1401
1402
// clang-format on
1403
1404
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR)            \
1405
  const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1406
      YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR);                   \
1407
  const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1408
      YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1409
1410
// TODO(fbarchard): Generate SIMD structures from float matrix.
1411
1412
// BT.601 limited range YUV to RGB reference
1413
//  R = (Y - 16) * 1.164             + V * 1.596
1414
//  G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1415
//  B = (Y - 16) * 1.164 + U * 2.018
1416
// KR = 0.299; KB = 0.114
1417
1418
// U and V contributions to R,G,B.
1419
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
1420
#define UB 129 /* round(2.018 * 64) */
1421
#else
1422
#define UB 128 /* max(128, round(2.018 * 64)) */
1423
#endif
1424
#define UG 25  /* round(0.391 * 64) */
1425
#define VG 52  /* round(0.813 * 64) */
1426
#define VR 102 /* round(1.596 * 64) */
1427
1428
// Y contribution to R,G,B.  Scale and bias.
1429
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1430
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1431
1432
MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1433
1434
#undef YG
1435
#undef YB
1436
#undef UB
1437
#undef UG
1438
#undef VG
1439
#undef VR
1440
1441
// BT.601 full range YUV to RGB reference (aka JPEG)
1442
// *  R = Y               + V * 1.40200
1443
// *  G = Y - U * 0.34414 - V * 0.71414
1444
// *  B = Y + U * 1.77200
1445
// KR = 0.299; KB = 0.114
1446
1447
// U and V contributions to R,G,B.
1448
#define UB 113 /* round(1.77200 * 64) */
1449
#define UG 22  /* round(0.34414 * 64) */
1450
#define VG 46  /* round(0.71414 * 64) */
1451
#define VR 90  /* round(1.40200 * 64) */
1452
1453
// Y contribution to R,G,B.  Scale and bias.
1454
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1455
#define YB 32    /* 64 / 2 */
1456
1457
MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1458
1459
#undef YG
1460
#undef YB
1461
#undef UB
1462
#undef UG
1463
#undef VG
1464
#undef VR
1465
1466
// BT.709 limited range YUV to RGB reference
1467
//  R = (Y - 16) * 1.164             + V * 1.793
1468
//  G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1469
//  B = (Y - 16) * 1.164 + U * 2.112
1470
//  KR = 0.2126, KB = 0.0722
1471
1472
// U and V contributions to R,G,B.
1473
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
1474
#define UB 135 /* round(2.112 * 64) */
1475
#else
1476
#define UB 128 /* max(128, round(2.112 * 64)) */
1477
#endif
1478
#define UG 14  /* round(0.213 * 64) */
1479
#define VG 34  /* round(0.533 * 64) */
1480
#define VR 115 /* round(1.793 * 64) */
1481
1482
// Y contribution to R,G,B.  Scale and bias.
1483
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1484
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1485
1486
MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1487
1488
#undef YG
1489
#undef YB
1490
#undef UB
1491
#undef UG
1492
#undef VG
1493
#undef VR
1494
1495
// BT.709 full range YUV to RGB reference
1496
//  R = Y               + V * 1.5748
1497
//  G = Y - U * 0.18732 - V * 0.46812
1498
//  B = Y + U * 1.8556
1499
//  KR = 0.2126, KB = 0.0722
1500
1501
// U and V contributions to R,G,B.
1502
#define UB 119 /* round(1.8556 * 64) */
1503
#define UG 12  /* round(0.18732 * 64) */
1504
#define VG 30  /* round(0.46812 * 64) */
1505
#define VR 101 /* round(1.5748 * 64) */
1506
1507
// Y contribution to R,G,B.  Scale and bias.  (same as jpeg)
1508
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1509
#define YB 32    /* 64 / 2 */
1510
1511
MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1512
1513
#undef YG
1514
#undef YB
1515
#undef UB
1516
#undef UG
1517
#undef VG
1518
#undef VR
1519
1520
// BT.2020 limited range YUV to RGB reference
1521
//  R = (Y - 16) * 1.164384                + V * 1.67867
1522
//  G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1523
//  B = (Y - 16) * 1.164384 + U * 2.14177
1524
// KR = 0.2627; KB = 0.0593
1525
1526
// U and V contributions to R,G,B.
1527
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
1528
#define UB 137 /* round(2.142 * 64) */
1529
#else
1530
#define UB 128 /* max(128, round(2.142 * 64)) */
1531
#endif
1532
#define UG 12  /* round(0.187326 * 64) */
1533
#define VG 42  /* round(0.65042 * 64) */
1534
#define VR 107 /* round(1.67867 * 64) */
1535
1536
// Y contribution to R,G,B.  Scale and bias.
1537
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1538
#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1539
1540
MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1541
1542
#undef YG
1543
#undef YB
1544
#undef UB
1545
#undef UG
1546
#undef VG
1547
#undef VR
1548
1549
// BT.2020 full range YUV to RGB reference
1550
//  R = Y                + V * 1.474600
1551
//  G = Y - U * 0.164553 - V * 0.571353
1552
//  B = Y + U * 1.881400
1553
// KR = 0.2627; KB = 0.0593
1554
1555
#define UB 120 /* round(1.881400 * 64) */
1556
#define UG 11  /* round(0.164553 * 64) */
1557
#define VG 37  /* round(0.571353 * 64) */
1558
#define VR 94  /* round(1.474600 * 64) */
1559
1560
// Y contribution to R,G,B.  Scale and bias.  (same as jpeg)
1561
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1562
#define YB 32    /* 64 / 2 */
1563
1564
MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1565
1566
#undef YG
1567
#undef YB
1568
#undef UB
1569
#undef UG
1570
#undef VG
1571
#undef VR
1572
1573
#undef BB
1574
#undef BG
1575
#undef BR
1576
1577
#undef MAKEYUVCONSTANTS
1578
1579
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1580
#define LOAD_YUV_CONSTANTS                 \
1581
  int ub = yuvconstants->kUVCoeff[0];      \
1582
  int vr = yuvconstants->kUVCoeff[1];      \
1583
  int ug = yuvconstants->kUVCoeff[2];      \
1584
  int vg = yuvconstants->kUVCoeff[3];      \
1585
  int yg = yuvconstants->kRGBCoeffBias[0]; \
1586
  int bb = yuvconstants->kRGBCoeffBias[1]; \
1587
  int bg = yuvconstants->kRGBCoeffBias[2]; \
1588
  int br = yuvconstants->kRGBCoeffBias[3]
1589
1590
#define CALC_RGB16                         \
1591
  int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1592
  int b16 = y1 + (u * ub) - bb;            \
1593
  int g16 = y1 + bg - (u * ug + v * vg);   \
1594
  int r16 = y1 + (v * vr) - br
1595
#else
1596
#define LOAD_YUV_CONSTANTS           \
1597
0
  int ub = yuvconstants->kUVToB[0];  \
1598
0
  int ug = yuvconstants->kUVToG[0];  \
1599
0
  int vg = yuvconstants->kUVToG[1];  \
1600
0
  int vr = yuvconstants->kUVToR[1];  \
1601
0
  int yg = yuvconstants->kYToRgb[0]; \
1602
0
  int yb = yuvconstants->kYBiasToRgb[0]
1603
1604
#define CALC_RGB16                                \
1605
0
  int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1606
0
  int8_t ui = (int8_t)u;                          \
1607
0
  int8_t vi = (int8_t)v;                          \
1608
0
  ui -= 0x80;                                     \
1609
0
  vi -= 0x80;                                     \
1610
0
  int b16 = y1 + (ui * ub);                       \
1611
0
  int g16 = y1 - (ui * ug + vi * vg);             \
1612
0
  int r16 = y1 + (vi * vr)
1613
#endif
1614
1615
// C reference code that mimics the YUV assembly.
1616
// Reads 8 bit YUV and leaves result as 16 bit.
1617
static __inline void YuvPixel(uint8_t y,
1618
                              uint8_t u,
1619
                              uint8_t v,
1620
                              uint8_t* b,
1621
                              uint8_t* g,
1622
                              uint8_t* r,
1623
0
                              const struct YuvConstants* yuvconstants) {
1624
0
  LOAD_YUV_CONSTANTS;
1625
0
  uint32_t y32 = y * 0x0101;
1626
0
  CALC_RGB16;
1627
0
  *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1628
0
  *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1629
0
  *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1630
0
}
1631
1632
// Reads 8 bit YUV and leaves result as 16 bit.
1633
static __inline void YuvPixel8_16(uint8_t y,
1634
                                  uint8_t u,
1635
                                  uint8_t v,
1636
                                  int* b,
1637
                                  int* g,
1638
                                  int* r,
1639
0
                                  const struct YuvConstants* yuvconstants) {
1640
0
  LOAD_YUV_CONSTANTS;
1641
0
  uint32_t y32 = y * 0x0101;
1642
0
  CALC_RGB16;
1643
0
  *b = b16;
1644
0
  *g = g16;
1645
0
  *r = r16;
1646
0
}
1647
1648
// C reference code that mimics the YUV 16 bit assembly.
1649
// Reads 10 bit YUV and leaves result as 16 bit.
1650
static __inline void YuvPixel10_16(uint16_t y,
1651
                                   uint16_t u,
1652
                                   uint16_t v,
1653
                                   int* b,
1654
                                   int* g,
1655
                                   int* r,
1656
0
                                   const struct YuvConstants* yuvconstants) {
1657
0
  LOAD_YUV_CONSTANTS;
1658
0
  uint32_t y32 = (y << 6) | (y >> 4);
1659
0
  u = STATIC_CAST(uint8_t, clamp255(u >> 2));
1660
0
  v = STATIC_CAST(uint8_t, clamp255(v >> 2));
1661
0
  CALC_RGB16;
1662
0
  *b = b16;
1663
0
  *g = g16;
1664
0
  *r = r16;
1665
0
}
1666
1667
// C reference code that mimics the YUV 16 bit assembly.
1668
// Reads 12 bit YUV and leaves result as 16 bit.
1669
static __inline void YuvPixel12_16(int16_t y,
1670
                                   int16_t u,
1671
                                   int16_t v,
1672
                                   int* b,
1673
                                   int* g,
1674
                                   int* r,
1675
0
                                   const struct YuvConstants* yuvconstants) {
1676
0
  LOAD_YUV_CONSTANTS;
1677
0
  uint32_t y32 = (y << 4) | (y >> 8);
1678
0
  u = STATIC_CAST(uint8_t, clamp255(u >> 4));
1679
0
  v = STATIC_CAST(uint8_t, clamp255(v >> 4));
1680
0
  CALC_RGB16;
1681
0
  *b = b16;
1682
0
  *g = g16;
1683
0
  *r = r16;
1684
0
}
1685
1686
// C reference code that mimics the YUV 10 bit assembly.
1687
// Reads 10 bit YUV and clamps down to 8 bit RGB.
1688
static __inline void YuvPixel10(uint16_t y,
1689
                                uint16_t u,
1690
                                uint16_t v,
1691
                                uint8_t* b,
1692
                                uint8_t* g,
1693
                                uint8_t* r,
1694
0
                                const struct YuvConstants* yuvconstants) {
1695
0
  int b16;
1696
0
  int g16;
1697
0
  int r16;
1698
0
  YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1699
0
  *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1700
0
  *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1701
0
  *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1702
0
}
1703
1704
// C reference code that mimics the YUV 12 bit assembly.
1705
// Reads 12 bit YUV and clamps down to 8 bit RGB.
1706
static __inline void YuvPixel12(uint16_t y,
1707
                                uint16_t u,
1708
                                uint16_t v,
1709
                                uint8_t* b,
1710
                                uint8_t* g,
1711
                                uint8_t* r,
1712
0
                                const struct YuvConstants* yuvconstants) {
1713
0
  int b16;
1714
0
  int g16;
1715
0
  int r16;
1716
0
  YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1717
0
  *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1718
0
  *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1719
0
  *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1720
0
}
1721
1722
// C reference code that mimics the YUV 16 bit assembly.
1723
// Reads 16 bit YUV and leaves result as 8 bit.
1724
static __inline void YuvPixel16_8(uint16_t y,
1725
                                  uint16_t u,
1726
                                  uint16_t v,
1727
                                  uint8_t* b,
1728
                                  uint8_t* g,
1729
                                  uint8_t* r,
1730
0
                                  const struct YuvConstants* yuvconstants) {
1731
0
  LOAD_YUV_CONSTANTS;
1732
0
  uint32_t y32 = y;
1733
0
  u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1734
0
  v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1735
0
  CALC_RGB16;
1736
0
  *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1737
0
  *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1738
0
  *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1739
0
}
1740
1741
// C reference code that mimics the YUV 16 bit assembly.
1742
// Reads 16 bit YUV and leaves result as 16 bit.
1743
static __inline void YuvPixel16_16(uint16_t y,
1744
                                   uint16_t u,
1745
                                   uint16_t v,
1746
                                   int* b,
1747
                                   int* g,
1748
                                   int* r,
1749
0
                                   const struct YuvConstants* yuvconstants) {
1750
0
  LOAD_YUV_CONSTANTS;
1751
0
  uint32_t y32 = y;
1752
0
  u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1753
0
  v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1754
0
  CALC_RGB16;
1755
0
  *b = b16;
1756
0
  *g = g16;
1757
0
  *r = r16;
1758
0
}
1759
1760
// C reference code that mimics the YUV assembly.
1761
// Reads 8 bit YUV and leaves result as 8 bit.
1762
static __inline void YPixel(uint8_t y,
1763
                            uint8_t* b,
1764
                            uint8_t* g,
1765
                            uint8_t* r,
1766
0
                            const struct YuvConstants* yuvconstants) {
1767
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1768
  int yg = yuvconstants->kRGBCoeffBias[0];
1769
  int ygb = yuvconstants->kRGBCoeffBias[4];
1770
#else
1771
0
  int ygb = yuvconstants->kYBiasToRgb[0];
1772
0
  int yg = yuvconstants->kYToRgb[0];
1773
0
#endif
1774
0
  uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1775
0
  uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
1776
0
  *b = b8;
1777
0
  *g = b8;
1778
0
  *r = b8;
1779
0
}
1780
1781
void I444ToARGBRow_C(const uint8_t* src_y,
1782
                     const uint8_t* src_u,
1783
                     const uint8_t* src_v,
1784
                     uint8_t* rgb_buf,
1785
                     const struct YuvConstants* yuvconstants,
1786
0
                     int width) {
1787
0
  int x;
1788
0
  for (x = 0; x < width; ++x) {
1789
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1790
0
             rgb_buf + 2, yuvconstants);
1791
0
    rgb_buf[3] = 255;
1792
0
    src_y += 1;
1793
0
    src_u += 1;
1794
0
    src_v += 1;
1795
0
    rgb_buf += 4;  // Advance 1 pixel.
1796
0
  }
1797
0
}
1798
1799
void I444ToRGB24Row_C(const uint8_t* src_y,
1800
                      const uint8_t* src_u,
1801
                      const uint8_t* src_v,
1802
                      uint8_t* rgb_buf,
1803
                      const struct YuvConstants* yuvconstants,
1804
0
                      int width) {
1805
0
  int x;
1806
0
  for (x = 0; x < width; ++x) {
1807
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1808
0
             rgb_buf + 2, yuvconstants);
1809
0
    src_y += 1;
1810
0
    src_u += 1;
1811
0
    src_v += 1;
1812
0
    rgb_buf += 3;  // Advance 1 pixel.
1813
0
  }
1814
0
}
1815
1816
// Also used for 420
1817
void I422ToARGBRow_C(const uint8_t* src_y,
1818
                     const uint8_t* src_u,
1819
                     const uint8_t* src_v,
1820
                     uint8_t* rgb_buf,
1821
                     const struct YuvConstants* yuvconstants,
1822
0
                     int width) {
1823
0
  int x;
1824
0
  for (x = 0; x < width - 1; x += 2) {
1825
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1826
0
             rgb_buf + 2, yuvconstants);
1827
0
    rgb_buf[3] = 255;
1828
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1829
0
             rgb_buf + 6, yuvconstants);
1830
0
    rgb_buf[7] = 255;
1831
0
    src_y += 2;
1832
0
    src_u += 1;
1833
0
    src_v += 1;
1834
0
    rgb_buf += 8;  // Advance 2 pixels.
1835
0
  }
1836
0
  if (width & 1) {
1837
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1838
0
             rgb_buf + 2, yuvconstants);
1839
0
    rgb_buf[3] = 255;
1840
0
  }
1841
0
}
1842
1843
// 10 bit YUV to ARGB
1844
void I210ToARGBRow_C(const uint16_t* src_y,
1845
                     const uint16_t* src_u,
1846
                     const uint16_t* src_v,
1847
                     uint8_t* rgb_buf,
1848
                     const struct YuvConstants* yuvconstants,
1849
0
                     int width) {
1850
0
  int x;
1851
0
  for (x = 0; x < width - 1; x += 2) {
1852
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1853
0
               rgb_buf + 2, yuvconstants);
1854
0
    rgb_buf[3] = 255;
1855
0
    YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1856
0
               rgb_buf + 6, yuvconstants);
1857
0
    rgb_buf[7] = 255;
1858
0
    src_y += 2;
1859
0
    src_u += 1;
1860
0
    src_v += 1;
1861
0
    rgb_buf += 8;  // Advance 2 pixels.
1862
0
  }
1863
0
  if (width & 1) {
1864
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1865
0
               rgb_buf + 2, yuvconstants);
1866
0
    rgb_buf[3] = 255;
1867
0
  }
1868
0
}
1869
1870
void I410ToARGBRow_C(const uint16_t* src_y,
1871
                     const uint16_t* src_u,
1872
                     const uint16_t* src_v,
1873
                     uint8_t* rgb_buf,
1874
                     const struct YuvConstants* yuvconstants,
1875
0
                     int width) {
1876
0
  int x;
1877
0
  for (x = 0; x < width; ++x) {
1878
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1879
0
               rgb_buf + 2, yuvconstants);
1880
0
    rgb_buf[3] = 255;
1881
0
    src_y += 1;
1882
0
    src_u += 1;
1883
0
    src_v += 1;
1884
0
    rgb_buf += 4;  // Advance 1 pixels.
1885
0
  }
1886
0
}
1887
1888
void I210AlphaToARGBRow_C(const uint16_t* src_y,
1889
                          const uint16_t* src_u,
1890
                          const uint16_t* src_v,
1891
                          const uint16_t* src_a,
1892
                          uint8_t* rgb_buf,
1893
                          const struct YuvConstants* yuvconstants,
1894
0
                          int width) {
1895
0
  int x;
1896
0
  for (x = 0; x < width - 1; x += 2) {
1897
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1898
0
               rgb_buf + 2, yuvconstants);
1899
0
    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
1900
0
    YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1901
0
               rgb_buf + 6, yuvconstants);
1902
0
    rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
1903
0
    src_y += 2;
1904
0
    src_u += 1;
1905
0
    src_v += 1;
1906
0
    src_a += 2;
1907
0
    rgb_buf += 8;  // Advance 2 pixels.
1908
0
  }
1909
0
  if (width & 1) {
1910
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1911
0
               rgb_buf + 2, yuvconstants);
1912
0
    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
1913
0
  }
1914
0
}
1915
1916
void I410AlphaToARGBRow_C(const uint16_t* src_y,
1917
                          const uint16_t* src_u,
1918
                          const uint16_t* src_v,
1919
                          const uint16_t* src_a,
1920
                          uint8_t* rgb_buf,
1921
                          const struct YuvConstants* yuvconstants,
1922
0
                          int width) {
1923
0
  int x;
1924
0
  for (x = 0; x < width; ++x) {
1925
0
    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1926
0
               rgb_buf + 2, yuvconstants);
1927
0
    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
1928
0
    src_y += 1;
1929
0
    src_u += 1;
1930
0
    src_v += 1;
1931
0
    src_a += 1;
1932
0
    rgb_buf += 4;  // Advance 1 pixels.
1933
0
  }
1934
0
}
1935
1936
// 12 bit YUV to ARGB
1937
void I212ToARGBRow_C(const uint16_t* src_y,
1938
                     const uint16_t* src_u,
1939
                     const uint16_t* src_v,
1940
                     uint8_t* rgb_buf,
1941
                     const struct YuvConstants* yuvconstants,
1942
0
                     int width) {
1943
0
  int x;
1944
0
  for (x = 0; x < width - 1; x += 2) {
1945
0
    YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1946
0
               rgb_buf + 2, yuvconstants);
1947
0
    rgb_buf[3] = 255;
1948
0
    YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1949
0
               rgb_buf + 6, yuvconstants);
1950
0
    rgb_buf[7] = 255;
1951
0
    src_y += 2;
1952
0
    src_u += 1;
1953
0
    src_v += 1;
1954
0
    rgb_buf += 8;  // Advance 2 pixels.
1955
0
  }
1956
0
  if (width & 1) {
1957
0
    YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1958
0
               rgb_buf + 2, yuvconstants);
1959
0
    rgb_buf[3] = 255;
1960
0
  }
1961
0
}
1962
1963
0
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1964
0
  uint32_t ar30;
1965
0
  b = b >> 4;  // convert 8 bit 10.6 to 10 bit.
1966
0
  g = g >> 4;
1967
0
  r = r >> 4;
1968
0
  b = Clamp10(b);
1969
0
  g = Clamp10(g);
1970
0
  r = Clamp10(r);
1971
0
  ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
1972
0
  (*(uint32_t*)rgb_buf) = ar30;
1973
0
}
1974
1975
// 10 bit YUV to 10 bit AR30
1976
void I210ToAR30Row_C(const uint16_t* src_y,
1977
                     const uint16_t* src_u,
1978
                     const uint16_t* src_v,
1979
                     uint8_t* rgb_buf,
1980
                     const struct YuvConstants* yuvconstants,
1981
0
                     int width) {
1982
0
  int x;
1983
0
  int b;
1984
0
  int g;
1985
0
  int r;
1986
0
  for (x = 0; x < width - 1; x += 2) {
1987
0
    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1988
0
    StoreAR30(rgb_buf, b, g, r);
1989
0
    YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1990
0
    StoreAR30(rgb_buf + 4, b, g, r);
1991
0
    src_y += 2;
1992
0
    src_u += 1;
1993
0
    src_v += 1;
1994
0
    rgb_buf += 8;  // Advance 2 pixels.
1995
0
  }
1996
0
  if (width & 1) {
1997
0
    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1998
0
    StoreAR30(rgb_buf, b, g, r);
1999
0
  }
2000
0
}
2001
2002
// 12 bit YUV to 10 bit AR30
2003
void I212ToAR30Row_C(const uint16_t* src_y,
2004
                     const uint16_t* src_u,
2005
                     const uint16_t* src_v,
2006
                     uint8_t* rgb_buf,
2007
                     const struct YuvConstants* yuvconstants,
2008
0
                     int width) {
2009
0
  int x;
2010
0
  int b;
2011
0
  int g;
2012
0
  int r;
2013
0
  for (x = 0; x < width - 1; x += 2) {
2014
0
    YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2015
0
    StoreAR30(rgb_buf, b, g, r);
2016
0
    YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2017
0
    StoreAR30(rgb_buf + 4, b, g, r);
2018
0
    src_y += 2;
2019
0
    src_u += 1;
2020
0
    src_v += 1;
2021
0
    rgb_buf += 8;  // Advance 2 pixels.
2022
0
  }
2023
0
  if (width & 1) {
2024
0
    YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2025
0
    StoreAR30(rgb_buf, b, g, r);
2026
0
  }
2027
0
}
2028
2029
void I410ToAR30Row_C(const uint16_t* src_y,
2030
                     const uint16_t* src_u,
2031
                     const uint16_t* src_v,
2032
                     uint8_t* rgb_buf,
2033
                     const struct YuvConstants* yuvconstants,
2034
0
                     int width) {
2035
0
  int x;
2036
0
  int b;
2037
0
  int g;
2038
0
  int r;
2039
0
  for (x = 0; x < width; ++x) {
2040
0
    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2041
0
    StoreAR30(rgb_buf, b, g, r);
2042
0
    src_y += 1;
2043
0
    src_u += 1;
2044
0
    src_v += 1;
2045
0
    rgb_buf += 4;  // Advance 1 pixel.
2046
0
  }
2047
0
}
2048
2049
// P210 has 10 bits in msb of 16 bit NV12 style layout.
2050
void P210ToARGBRow_C(const uint16_t* src_y,
2051
                     const uint16_t* src_uv,
2052
                     uint8_t* dst_argb,
2053
                     const struct YuvConstants* yuvconstants,
2054
0
                     int width) {
2055
0
  int x;
2056
0
  for (x = 0; x < width - 1; x += 2) {
2057
0
    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2058
0
                 dst_argb + 2, yuvconstants);
2059
0
    dst_argb[3] = 255;
2060
0
    YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2061
0
                 dst_argb + 6, yuvconstants);
2062
0
    dst_argb[7] = 255;
2063
0
    src_y += 2;
2064
0
    src_uv += 2;
2065
0
    dst_argb += 8;  // Advance 2 pixels.
2066
0
  }
2067
0
  if (width & 1) {
2068
0
    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2069
0
                 dst_argb + 2, yuvconstants);
2070
0
    dst_argb[3] = 255;
2071
0
  }
2072
0
}
2073
2074
void P410ToARGBRow_C(const uint16_t* src_y,
2075
                     const uint16_t* src_uv,
2076
                     uint8_t* dst_argb,
2077
                     const struct YuvConstants* yuvconstants,
2078
0
                     int width) {
2079
0
  int x;
2080
0
  for (x = 0; x < width; ++x) {
2081
0
    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2082
0
                 dst_argb + 2, yuvconstants);
2083
0
    dst_argb[3] = 255;
2084
0
    src_y += 1;
2085
0
    src_uv += 2;
2086
0
    dst_argb += 4;  // Advance 1 pixels.
2087
0
  }
2088
0
}
2089
2090
void P210ToAR30Row_C(const uint16_t* src_y,
2091
                     const uint16_t* src_uv,
2092
                     uint8_t* dst_ar30,
2093
                     const struct YuvConstants* yuvconstants,
2094
0
                     int width) {
2095
0
  int x;
2096
0
  int b;
2097
0
  int g;
2098
0
  int r;
2099
0
  for (x = 0; x < width - 1; x += 2) {
2100
0
    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2101
0
    StoreAR30(dst_ar30, b, g, r);
2102
0
    YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2103
0
    StoreAR30(dst_ar30 + 4, b, g, r);
2104
0
    src_y += 2;
2105
0
    src_uv += 2;
2106
0
    dst_ar30 += 8;  // Advance 2 pixels.
2107
0
  }
2108
0
  if (width & 1) {
2109
0
    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2110
0
    StoreAR30(dst_ar30, b, g, r);
2111
0
  }
2112
0
}
2113
2114
void P410ToAR30Row_C(const uint16_t* src_y,
2115
                     const uint16_t* src_uv,
2116
                     uint8_t* dst_ar30,
2117
                     const struct YuvConstants* yuvconstants,
2118
0
                     int width) {
2119
0
  int x;
2120
0
  int b;
2121
0
  int g;
2122
0
  int r;
2123
0
  for (x = 0; x < width; ++x) {
2124
0
    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2125
0
    StoreAR30(dst_ar30, b, g, r);
2126
0
    src_y += 1;
2127
0
    src_uv += 2;
2128
0
    dst_ar30 += 4;  // Advance 1 pixel.
2129
0
  }
2130
0
}
2131
2132
// 8 bit YUV to 10 bit AR30
2133
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
2134
void I422ToAR30Row_C(const uint8_t* src_y,
2135
                     const uint8_t* src_u,
2136
                     const uint8_t* src_v,
2137
                     uint8_t* rgb_buf,
2138
                     const struct YuvConstants* yuvconstants,
2139
0
                     int width) {
2140
0
  int x;
2141
0
  int b;
2142
0
  int g;
2143
0
  int r;
2144
0
  for (x = 0; x < width - 1; x += 2) {
2145
0
    YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2146
0
    StoreAR30(rgb_buf, b, g, r);
2147
0
    YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2148
0
    StoreAR30(rgb_buf + 4, b, g, r);
2149
0
    src_y += 2;
2150
0
    src_u += 1;
2151
0
    src_v += 1;
2152
0
    rgb_buf += 8;  // Advance 2 pixels.
2153
0
  }
2154
0
  if (width & 1) {
2155
0
    YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2156
0
    StoreAR30(rgb_buf, b, g, r);
2157
0
  }
2158
0
}
2159
2160
void I444AlphaToARGBRow_C(const uint8_t* src_y,
2161
                          const uint8_t* src_u,
2162
                          const uint8_t* src_v,
2163
                          const uint8_t* src_a,
2164
                          uint8_t* rgb_buf,
2165
                          const struct YuvConstants* yuvconstants,
2166
0
                          int width) {
2167
0
  int x;
2168
0
  for (x = 0; x < width; ++x) {
2169
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2170
0
             rgb_buf + 2, yuvconstants);
2171
0
    rgb_buf[3] = src_a[0];
2172
0
    src_y += 1;
2173
0
    src_u += 1;
2174
0
    src_v += 1;
2175
0
    src_a += 1;
2176
0
    rgb_buf += 4;  // Advance 1 pixel.
2177
0
  }
2178
0
}
2179
2180
void I422AlphaToARGBRow_C(const uint8_t* src_y,
2181
                          const uint8_t* src_u,
2182
                          const uint8_t* src_v,
2183
                          const uint8_t* src_a,
2184
                          uint8_t* rgb_buf,
2185
                          const struct YuvConstants* yuvconstants,
2186
0
                          int width) {
2187
0
  int x;
2188
0
  for (x = 0; x < width - 1; x += 2) {
2189
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2190
0
             rgb_buf + 2, yuvconstants);
2191
0
    rgb_buf[3] = src_a[0];
2192
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2193
0
             rgb_buf + 6, yuvconstants);
2194
0
    rgb_buf[7] = src_a[1];
2195
0
    src_y += 2;
2196
0
    src_u += 1;
2197
0
    src_v += 1;
2198
0
    src_a += 2;
2199
0
    rgb_buf += 8;  // Advance 2 pixels.
2200
0
  }
2201
0
  if (width & 1) {
2202
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2203
0
             rgb_buf + 2, yuvconstants);
2204
0
    rgb_buf[3] = src_a[0];
2205
0
  }
2206
0
}
2207
2208
void I422ToRGB24Row_C(const uint8_t* src_y,
2209
                      const uint8_t* src_u,
2210
                      const uint8_t* src_v,
2211
                      uint8_t* rgb_buf,
2212
                      const struct YuvConstants* yuvconstants,
2213
0
                      int width) {
2214
0
  int x;
2215
0
  for (x = 0; x < width - 1; x += 2) {
2216
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2217
0
             rgb_buf + 2, yuvconstants);
2218
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2219
0
             rgb_buf + 5, yuvconstants);
2220
0
    src_y += 2;
2221
0
    src_u += 1;
2222
0
    src_v += 1;
2223
0
    rgb_buf += 6;  // Advance 2 pixels.
2224
0
  }
2225
0
  if (width & 1) {
2226
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2227
0
             rgb_buf + 2, yuvconstants);
2228
0
  }
2229
0
}
2230
2231
void I422ToARGB4444Row_C(const uint8_t* src_y,
2232
                         const uint8_t* src_u,
2233
                         const uint8_t* src_v,
2234
                         uint8_t* dst_argb4444,
2235
                         const struct YuvConstants* yuvconstants,
2236
0
                         int width) {
2237
0
  uint8_t b0;
2238
0
  uint8_t g0;
2239
0
  uint8_t r0;
2240
0
  uint8_t b1;
2241
0
  uint8_t g1;
2242
0
  uint8_t r1;
2243
0
  int x;
2244
0
  for (x = 0; x < width - 1; x += 2) {
2245
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2246
0
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2247
0
    b0 = b0 >> 4;
2248
0
    g0 = g0 >> 4;
2249
0
    r0 = r0 >> 4;
2250
0
    b1 = b1 >> 4;
2251
0
    g1 = g1 >> 4;
2252
0
    r1 = r1 >> 4;
2253
0
    *(uint16_t*)(dst_argb4444 + 0) =
2254
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2255
0
    *(uint16_t*)(dst_argb4444 + 2) =
2256
0
        STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
2257
0
    src_y += 2;
2258
0
    src_u += 1;
2259
0
    src_v += 1;
2260
0
    dst_argb4444 += 4;  // Advance 2 pixels.
2261
0
  }
2262
0
  if (width & 1) {
2263
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2264
0
    b0 = b0 >> 4;
2265
0
    g0 = g0 >> 4;
2266
0
    r0 = r0 >> 4;
2267
0
    *(uint16_t*)(dst_argb4444) =
2268
0
        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2269
0
  }
2270
0
}
2271
2272
void I422ToARGB1555Row_C(const uint8_t* src_y,
2273
                         const uint8_t* src_u,
2274
                         const uint8_t* src_v,
2275
                         uint8_t* dst_argb1555,
2276
                         const struct YuvConstants* yuvconstants,
2277
0
                         int width) {
2278
0
  uint8_t b0;
2279
0
  uint8_t g0;
2280
0
  uint8_t r0;
2281
0
  uint8_t b1;
2282
0
  uint8_t g1;
2283
0
  uint8_t r1;
2284
0
  int x;
2285
0
  for (x = 0; x < width - 1; x += 2) {
2286
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2287
0
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2288
0
    b0 = b0 >> 3;
2289
0
    g0 = g0 >> 3;
2290
0
    r0 = r0 >> 3;
2291
0
    b1 = b1 >> 3;
2292
0
    g1 = g1 >> 3;
2293
0
    r1 = r1 >> 3;
2294
0
    *(uint16_t*)(dst_argb1555 + 0) =
2295
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2296
0
    *(uint16_t*)(dst_argb1555 + 2) =
2297
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
2298
0
    src_y += 2;
2299
0
    src_u += 1;
2300
0
    src_v += 1;
2301
0
    dst_argb1555 += 4;  // Advance 2 pixels.
2302
0
  }
2303
0
  if (width & 1) {
2304
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2305
0
    b0 = b0 >> 3;
2306
0
    g0 = g0 >> 3;
2307
0
    r0 = r0 >> 3;
2308
0
    *(uint16_t*)(dst_argb1555) =
2309
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2310
0
  }
2311
0
}
2312
2313
void I422ToRGB565Row_C(const uint8_t* src_y,
2314
                       const uint8_t* src_u,
2315
                       const uint8_t* src_v,
2316
                       uint8_t* dst_rgb565,
2317
                       const struct YuvConstants* yuvconstants,
2318
0
                       int width) {
2319
0
  uint8_t b0;
2320
0
  uint8_t g0;
2321
0
  uint8_t r0;
2322
0
  uint8_t b1;
2323
0
  uint8_t g1;
2324
0
  uint8_t r1;
2325
0
  int x;
2326
0
  for (x = 0; x < width - 1; x += 2) {
2327
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2328
0
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2329
0
    b0 = b0 >> 3;
2330
0
    g0 = g0 >> 2;
2331
0
    r0 = r0 >> 3;
2332
0
    b1 = b1 >> 3;
2333
0
    g1 = g1 >> 2;
2334
0
    r1 = r1 >> 3;
2335
0
    *(uint16_t*)(dst_rgb565 + 0) =
2336
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2337
0
    *(uint16_t*)(dst_rgb565 + 2) =
2338
0
        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
2339
0
    src_y += 2;
2340
0
    src_u += 1;
2341
0
    src_v += 1;
2342
0
    dst_rgb565 += 4;  // Advance 2 pixels.
2343
0
  }
2344
0
  if (width & 1) {
2345
0
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2346
0
    b0 = b0 >> 3;
2347
0
    g0 = g0 >> 2;
2348
0
    r0 = r0 >> 3;
2349
0
    *(uint16_t*)(dst_rgb565 + 0) =
2350
0
        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2351
0
  }
2352
0
}
2353
2354
void NV12ToARGBRow_C(const uint8_t* src_y,
2355
                     const uint8_t* src_uv,
2356
                     uint8_t* rgb_buf,
2357
                     const struct YuvConstants* yuvconstants,
2358
0
                     int width) {
2359
0
  int x;
2360
0
  for (x = 0; x < width - 1; x += 2) {
2361
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2362
0
             rgb_buf + 2, yuvconstants);
2363
0
    rgb_buf[3] = 255;
2364
0
    YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2365
0
             rgb_buf + 6, yuvconstants);
2366
0
    rgb_buf[7] = 255;
2367
0
    src_y += 2;
2368
0
    src_uv += 2;
2369
0
    rgb_buf += 8;  // Advance 2 pixels.
2370
0
  }
2371
0
  if (width & 1) {
2372
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2373
0
             rgb_buf + 2, yuvconstants);
2374
0
    rgb_buf[3] = 255;
2375
0
  }
2376
0
}
2377
2378
void NV21ToARGBRow_C(const uint8_t* src_y,
2379
                     const uint8_t* src_vu,
2380
                     uint8_t* rgb_buf,
2381
                     const struct YuvConstants* yuvconstants,
2382
0
                     int width) {
2383
0
  int x;
2384
0
  for (x = 0; x < width - 1; x += 2) {
2385
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2386
0
             rgb_buf + 2, yuvconstants);
2387
0
    rgb_buf[3] = 255;
2388
0
    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2389
0
             rgb_buf + 6, yuvconstants);
2390
0
    rgb_buf[7] = 255;
2391
0
    src_y += 2;
2392
0
    src_vu += 2;
2393
0
    rgb_buf += 8;  // Advance 2 pixels.
2394
0
  }
2395
0
  if (width & 1) {
2396
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2397
0
             rgb_buf + 2, yuvconstants);
2398
0
    rgb_buf[3] = 255;
2399
0
  }
2400
0
}
2401
2402
void NV12ToRGB24Row_C(const uint8_t* src_y,
2403
                      const uint8_t* src_uv,
2404
                      uint8_t* rgb_buf,
2405
                      const struct YuvConstants* yuvconstants,
2406
0
                      int width) {
2407
0
  int x;
2408
0
  for (x = 0; x < width - 1; x += 2) {
2409
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2410
0
             rgb_buf + 2, yuvconstants);
2411
0
    YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2412
0
             rgb_buf + 5, yuvconstants);
2413
0
    src_y += 2;
2414
0
    src_uv += 2;
2415
0
    rgb_buf += 6;  // Advance 2 pixels.
2416
0
  }
2417
0
  if (width & 1) {
2418
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2419
0
             rgb_buf + 2, yuvconstants);
2420
0
  }
2421
0
}
2422
2423
void NV21ToRGB24Row_C(const uint8_t* src_y,
2424
                      const uint8_t* src_vu,
2425
                      uint8_t* rgb_buf,
2426
                      const struct YuvConstants* yuvconstants,
2427
0
                      int width) {
2428
0
  int x;
2429
0
  for (x = 0; x < width - 1; x += 2) {
2430
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2431
0
             rgb_buf + 2, yuvconstants);
2432
0
    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2433
0
             rgb_buf + 5, yuvconstants);
2434
0
    src_y += 2;
2435
0
    src_vu += 2;
2436
0
    rgb_buf += 6;  // Advance 2 pixels.
2437
0
  }
2438
0
  if (width & 1) {
2439
0
    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2440
0
             rgb_buf + 2, yuvconstants);
2441
0
  }
2442
0
}
2443
2444
void NV12ToRGB565Row_C(const uint8_t* src_y,
2445
                       const uint8_t* src_uv,
2446
                       uint8_t* dst_rgb565,
2447
                       const struct YuvConstants* yuvconstants,
2448
0
                       int width) {
2449
0
  uint8_t b0;
2450
0
  uint8_t g0;
2451
0
  uint8_t r0;
2452
0
  uint8_t b1;
2453
0
  uint8_t g1;
2454
0
  uint8_t r1;
2455
0
  int x;
2456
0
  for (x = 0; x < width - 1; x += 2) {
2457
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2458
0
    YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2459
0
    b0 = b0 >> 3;
2460
0
    g0 = g0 >> 2;
2461
0
    r0 = r0 >> 3;
2462
0
    b1 = b1 >> 3;
2463
0
    g1 = g1 >> 2;
2464
0
    r1 = r1 >> 3;
2465
0
    *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
2466
0
                                   STATIC_CAST(uint16_t, g0 << 5) |
2467
0
                                   STATIC_CAST(uint16_t, r0 << 11);
2468
0
    *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
2469
0
                                   STATIC_CAST(uint16_t, g1 << 5) |
2470
0
                                   STATIC_CAST(uint16_t, r1 << 11);
2471
0
    src_y += 2;
2472
0
    src_uv += 2;
2473
0
    dst_rgb565 += 4;  // Advance 2 pixels.
2474
0
  }
2475
0
  if (width & 1) {
2476
0
    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2477
0
    b0 = b0 >> 3;
2478
0
    g0 = g0 >> 2;
2479
0
    r0 = r0 >> 3;
2480
0
    *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
2481
0
                               STATIC_CAST(uint16_t, g0 << 5) |
2482
0
                               STATIC_CAST(uint16_t, r0 << 11);
2483
0
  }
2484
0
}
2485
2486
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2487
                     uint8_t* rgb_buf,
2488
                     const struct YuvConstants* yuvconstants,
2489
0
                     int width) {
2490
0
  int x;
2491
0
  for (x = 0; x < width - 1; x += 2) {
2492
0
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2493
0
             rgb_buf + 2, yuvconstants);
2494
0
    rgb_buf[3] = 255;
2495
0
    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2496
0
             rgb_buf + 6, yuvconstants);
2497
0
    rgb_buf[7] = 255;
2498
0
    src_yuy2 += 4;
2499
0
    rgb_buf += 8;  // Advance 2 pixels.
2500
0
  }
2501
0
  if (width & 1) {
2502
0
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2503
0
             rgb_buf + 2, yuvconstants);
2504
0
    rgb_buf[3] = 255;
2505
0
  }
2506
0
}
2507
2508
void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2509
                     uint8_t* rgb_buf,
2510
                     const struct YuvConstants* yuvconstants,
2511
0
                     int width) {
2512
0
  int x;
2513
0
  for (x = 0; x < width - 1; x += 2) {
2514
0
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2515
0
             rgb_buf + 2, yuvconstants);
2516
0
    rgb_buf[3] = 255;
2517
0
    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2518
0
             rgb_buf + 6, yuvconstants);
2519
0
    rgb_buf[7] = 255;
2520
0
    src_uyvy += 4;
2521
0
    rgb_buf += 8;  // Advance 2 pixels.
2522
0
  }
2523
0
  if (width & 1) {
2524
0
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2525
0
             rgb_buf + 2, yuvconstants);
2526
0
    rgb_buf[3] = 255;
2527
0
  }
2528
0
}
2529
2530
void I422ToRGBARow_C(const uint8_t* src_y,
2531
                     const uint8_t* src_u,
2532
                     const uint8_t* src_v,
2533
                     uint8_t* rgb_buf,
2534
                     const struct YuvConstants* yuvconstants,
2535
0
                     int width) {
2536
0
  int x;
2537
0
  for (x = 0; x < width - 1; x += 2) {
2538
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2539
0
             rgb_buf + 3, yuvconstants);
2540
0
    rgb_buf[0] = 255;
2541
0
    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2542
0
             rgb_buf + 7, yuvconstants);
2543
0
    rgb_buf[4] = 255;
2544
0
    src_y += 2;
2545
0
    src_u += 1;
2546
0
    src_v += 1;
2547
0
    rgb_buf += 8;  // Advance 2 pixels.
2548
0
  }
2549
0
  if (width & 1) {
2550
0
    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2551
0
             rgb_buf + 3, yuvconstants);
2552
0
    rgb_buf[0] = 255;
2553
0
  }
2554
0
}
2555
2556
void I400ToARGBRow_C(const uint8_t* src_y,
2557
                     uint8_t* rgb_buf,
2558
                     const struct YuvConstants* yuvconstants,
2559
0
                     int width) {
2560
0
  int x;
2561
0
  for (x = 0; x < width - 1; x += 2) {
2562
0
    YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2563
0
    rgb_buf[3] = 255;
2564
0
    YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2565
0
    rgb_buf[7] = 255;
2566
0
    src_y += 2;
2567
0
    rgb_buf += 8;  // Advance 2 pixels.
2568
0
  }
2569
0
  if (width & 1) {
2570
0
    YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2571
0
    rgb_buf[3] = 255;
2572
0
  }
2573
0
}
2574
2575
0
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2576
0
  int x;
2577
0
  src += width - 1;
2578
0
  for (x = 0; x < width - 1; x += 2) {
2579
0
    dst[x] = src[0];
2580
0
    dst[x + 1] = src[-1];
2581
0
    src -= 2;
2582
0
  }
2583
0
  if (width & 1) {
2584
0
    dst[width - 1] = src[0];
2585
0
  }
2586
0
}
2587
2588
0
void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) {
2589
0
  int x;
2590
0
  src += width - 1;
2591
0
  for (x = 0; x < width - 1; x += 2) {
2592
0
    dst[x] = src[0];
2593
0
    dst[x + 1] = src[-1];
2594
0
    src -= 2;
2595
0
  }
2596
0
  if (width & 1) {
2597
0
    dst[width - 1] = src[0];
2598
0
  }
2599
0
}
2600
2601
0
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2602
0
  int x;
2603
0
  src_uv += (width - 1) << 1;
2604
0
  for (x = 0; x < width; ++x) {
2605
0
    dst_uv[0] = src_uv[0];
2606
0
    dst_uv[1] = src_uv[1];
2607
0
    src_uv -= 2;
2608
0
    dst_uv += 2;
2609
0
  }
2610
0
}
2611
2612
void MirrorSplitUVRow_C(const uint8_t* src_uv,
2613
                        uint8_t* dst_u,
2614
                        uint8_t* dst_v,
2615
0
                        int width) {
2616
0
  int x;
2617
0
  src_uv += (width - 1) << 1;
2618
0
  for (x = 0; x < width - 1; x += 2) {
2619
0
    dst_u[x] = src_uv[0];
2620
0
    dst_u[x + 1] = src_uv[-2];
2621
0
    dst_v[x] = src_uv[1];
2622
0
    dst_v[x + 1] = src_uv[-2 + 1];
2623
0
    src_uv -= 4;
2624
0
  }
2625
0
  if (width & 1) {
2626
0
    dst_u[width - 1] = src_uv[0];
2627
0
    dst_v[width - 1] = src_uv[1];
2628
0
  }
2629
0
}
2630
2631
0
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2632
0
  int x;
2633
0
  const uint32_t* src32 = (const uint32_t*)(src);
2634
0
  uint32_t* dst32 = (uint32_t*)(dst);
2635
0
  src32 += width - 1;
2636
0
  for (x = 0; x < width - 1; x += 2) {
2637
0
    dst32[x] = src32[0];
2638
0
    dst32[x + 1] = src32[-1];
2639
0
    src32 -= 2;
2640
0
  }
2641
0
  if (width & 1) {
2642
0
    dst32[width - 1] = src32[0];
2643
0
  }
2644
0
}
2645
2646
0
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2647
0
  int x;
2648
0
  src_rgb24 += width * 3 - 3;
2649
0
  for (x = 0; x < width; ++x) {
2650
0
    uint8_t b = src_rgb24[0];
2651
0
    uint8_t g = src_rgb24[1];
2652
0
    uint8_t r = src_rgb24[2];
2653
0
    dst_rgb24[0] = b;
2654
0
    dst_rgb24[1] = g;
2655
0
    dst_rgb24[2] = r;
2656
0
    src_rgb24 -= 3;
2657
0
    dst_rgb24 += 3;
2658
0
  }
2659
0
}
2660
2661
void SplitUVRow_C(const uint8_t* src_uv,
2662
                  uint8_t* dst_u,
2663
                  uint8_t* dst_v,
2664
0
                  int width) {
2665
0
  int x;
2666
0
  for (x = 0; x < width - 1; x += 2) {
2667
0
    dst_u[x] = src_uv[0];
2668
0
    dst_u[x + 1] = src_uv[2];
2669
0
    dst_v[x] = src_uv[1];
2670
0
    dst_v[x + 1] = src_uv[3];
2671
0
    src_uv += 4;
2672
0
  }
2673
0
  if (width & 1) {
2674
0
    dst_u[width - 1] = src_uv[0];
2675
0
    dst_v[width - 1] = src_uv[1];
2676
0
  }
2677
0
}
2678
2679
void MergeUVRow_C(const uint8_t* src_u,
2680
                  const uint8_t* src_v,
2681
                  uint8_t* dst_uv,
2682
0
                  int width) {
2683
0
  int x;
2684
0
  for (x = 0; x < width - 1; x += 2) {
2685
0
    dst_uv[0] = src_u[x];
2686
0
    dst_uv[1] = src_v[x];
2687
0
    dst_uv[2] = src_u[x + 1];
2688
0
    dst_uv[3] = src_v[x + 1];
2689
0
    dst_uv += 4;
2690
0
  }
2691
0
  if (width & 1) {
2692
0
    dst_uv[0] = src_u[width - 1];
2693
0
    dst_uv[1] = src_v[width - 1];
2694
0
  }
2695
0
}
2696
2697
void DetileRow_C(const uint8_t* src,
2698
                 ptrdiff_t src_tile_stride,
2699
                 uint8_t* dst,
2700
0
                 int width) {
2701
0
  int x;
2702
0
  for (x = 0; x < width - 15; x += 16) {
2703
0
    memcpy(dst, src, 16);
2704
0
    dst += 16;
2705
0
    src += src_tile_stride;
2706
0
  }
2707
0
  if (width & 15) {
2708
0
    memcpy(dst, src, width & 15);
2709
0
  }
2710
0
}
2711
2712
void DetileRow_16_C(const uint16_t* src,
2713
                    ptrdiff_t src_tile_stride,
2714
                    uint16_t* dst,
2715
0
                    int width) {
2716
0
  int x;
2717
0
  for (x = 0; x < width - 15; x += 16) {
2718
0
    memcpy(dst, src, 16 * sizeof(uint16_t));
2719
0
    dst += 16;
2720
0
    src += src_tile_stride;
2721
0
  }
2722
0
  if (width & 15) {
2723
0
    memcpy(dst, src, (width & 15) * sizeof(uint16_t));
2724
0
  }
2725
0
}
2726
2727
void DetileSplitUVRow_C(const uint8_t* src_uv,
2728
                        ptrdiff_t src_tile_stride,
2729
                        uint8_t* dst_u,
2730
                        uint8_t* dst_v,
2731
0
                        int width) {
2732
0
  int x;
2733
0
  for (x = 0; x < width - 15; x += 16) {
2734
0
    SplitUVRow_C(src_uv, dst_u, dst_v, 8);
2735
0
    dst_u += 8;
2736
0
    dst_v += 8;
2737
0
    src_uv += src_tile_stride;
2738
0
  }
2739
0
  if (width & 15) {
2740
0
    SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
2741
0
  }
2742
0
}
2743
2744
void DetileToYUY2_C(const uint8_t* src_y,
2745
                    ptrdiff_t src_y_tile_stride,
2746
                    const uint8_t* src_uv,
2747
                    ptrdiff_t src_uv_tile_stride,
2748
                    uint8_t* dst_yuy2,
2749
0
                    int width) {
2750
0
  for (int x = 0; x < width - 15; x += 16) {
2751
0
    for (int i = 0; i < 8; i++) {
2752
0
      dst_yuy2[0] = src_y[0];
2753
0
      dst_yuy2[1] = src_uv[0];
2754
0
      dst_yuy2[2] = src_y[1];
2755
0
      dst_yuy2[3] = src_uv[1];
2756
0
      dst_yuy2 += 4;
2757
0
      src_y += 2;
2758
0
      src_uv += 2;
2759
0
    }
2760
0
    src_y += src_y_tile_stride - 16;
2761
0
    src_uv += src_uv_tile_stride - 16;
2762
0
  }
2763
0
}
2764
2765
// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
2766
// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
2767
// block contain all of the lower 2 bits of each pixel packed together, and the
2768
// next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are
2769
// packed into 1x4 blocks, whereas the upper bits are packed in normal raster
2770
// order.
2771
0
void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
2772
0
  for (size_t i = 0; i < size; i += 80) {
2773
0
    const uint8_t* src_lower_bits = src;
2774
0
    const uint8_t* src_upper_bits = src + 16;
2775
2776
0
    for (int j = 0; j < 4; j++) {
2777
0
      for (int k = 0; k < 16; k++) {
2778
0
        *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
2779
0
                 (uint16_t)*src_upper_bits << 8 |
2780
0
                 (uint16_t)*src_upper_bits >> 2;
2781
0
        src_upper_bits++;
2782
0
      }
2783
0
    }
2784
2785
0
    src += 80;
2786
0
  }
2787
0
}
2788
2789
void SplitRGBRow_C(const uint8_t* src_rgb,
2790
                   uint8_t* dst_r,
2791
                   uint8_t* dst_g,
2792
                   uint8_t* dst_b,
2793
0
                   int width) {
2794
0
  int x;
2795
0
  for (x = 0; x < width; ++x) {
2796
0
    dst_r[x] = src_rgb[0];
2797
0
    dst_g[x] = src_rgb[1];
2798
0
    dst_b[x] = src_rgb[2];
2799
0
    src_rgb += 3;
2800
0
  }
2801
0
}
2802
2803
void MergeRGBRow_C(const uint8_t* src_r,
2804
                   const uint8_t* src_g,
2805
                   const uint8_t* src_b,
2806
                   uint8_t* dst_rgb,
2807
0
                   int width) {
2808
0
  int x;
2809
0
  for (x = 0; x < width; ++x) {
2810
0
    dst_rgb[0] = src_r[x];
2811
0
    dst_rgb[1] = src_g[x];
2812
0
    dst_rgb[2] = src_b[x];
2813
0
    dst_rgb += 3;
2814
0
  }
2815
0
}
2816
2817
void SplitARGBRow_C(const uint8_t* src_argb,
2818
                    uint8_t* dst_r,
2819
                    uint8_t* dst_g,
2820
                    uint8_t* dst_b,
2821
                    uint8_t* dst_a,
2822
0
                    int width) {
2823
0
  int x;
2824
0
  for (x = 0; x < width; ++x) {
2825
0
    dst_b[x] = src_argb[0];
2826
0
    dst_g[x] = src_argb[1];
2827
0
    dst_r[x] = src_argb[2];
2828
0
    dst_a[x] = src_argb[3];
2829
0
    src_argb += 4;
2830
0
  }
2831
0
}
2832
2833
void MergeARGBRow_C(const uint8_t* src_r,
2834
                    const uint8_t* src_g,
2835
                    const uint8_t* src_b,
2836
                    const uint8_t* src_a,
2837
                    uint8_t* dst_argb,
2838
0
                    int width) {
2839
0
  int x;
2840
0
  for (x = 0; x < width; ++x) {
2841
0
    dst_argb[0] = src_b[x];
2842
0
    dst_argb[1] = src_g[x];
2843
0
    dst_argb[2] = src_r[x];
2844
0
    dst_argb[3] = src_a[x];
2845
0
    dst_argb += 4;
2846
0
  }
2847
0
}
2848
2849
void MergeXR30Row_C(const uint16_t* src_r,
2850
                    const uint16_t* src_g,
2851
                    const uint16_t* src_b,
2852
                    uint8_t* dst_ar30,
2853
                    int depth,
2854
0
                    int width) {
2855
0
  assert(depth >= 10);
2856
0
  assert(depth <= 16);
2857
0
  int x;
2858
0
  int shift = depth - 10;
2859
0
  uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
2860
0
  for (x = 0; x < width; ++x) {
2861
0
    uint32_t r = clamp1023(src_r[x] >> shift);
2862
0
    uint32_t g = clamp1023(src_g[x] >> shift);
2863
0
    uint32_t b = clamp1023(src_b[x] >> shift);
2864
0
    dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
2865
0
  }
2866
0
}
2867
2868
void MergeAR64Row_C(const uint16_t* src_r,
2869
                    const uint16_t* src_g,
2870
                    const uint16_t* src_b,
2871
                    const uint16_t* src_a,
2872
                    uint16_t* dst_ar64,
2873
                    int depth,
2874
0
                    int width) {
2875
0
  assert(depth >= 1);
2876
0
  assert(depth <= 16);
2877
0
  int x;
2878
0
  int shift = 16 - depth;
2879
0
  int max = (1 << depth) - 1;
2880
0
  for (x = 0; x < width; ++x) {
2881
0
    dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
2882
0
    dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
2883
0
    dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
2884
0
    dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
2885
0
    dst_ar64 += 4;
2886
0
  }
2887
0
}
2888
2889
void MergeARGB16To8Row_C(const uint16_t* src_r,
2890
                         const uint16_t* src_g,
2891
                         const uint16_t* src_b,
2892
                         const uint16_t* src_a,
2893
                         uint8_t* dst_argb,
2894
                         int depth,
2895
0
                         int width) {
2896
0
  assert(depth >= 8);
2897
0
  assert(depth <= 16);
2898
0
  int x;
2899
0
  int shift = depth - 8;
2900
0
  for (x = 0; x < width; ++x) {
2901
0
    dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
2902
0
    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
2903
0
    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
2904
0
    dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
2905
0
    dst_argb += 4;
2906
0
  }
2907
0
}
2908
2909
void MergeXR64Row_C(const uint16_t* src_r,
2910
                    const uint16_t* src_g,
2911
                    const uint16_t* src_b,
2912
                    uint16_t* dst_ar64,
2913
                    int depth,
2914
0
                    int width) {
2915
0
  assert(depth >= 1);
2916
0
  assert(depth <= 16);
2917
0
  int x;
2918
0
  int shift = 16 - depth;
2919
0
  int max = (1 << depth) - 1;
2920
0
  for (x = 0; x < width; ++x) {
2921
0
    dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
2922
0
    dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
2923
0
    dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
2924
0
    dst_ar64[3] = 0xffff;
2925
0
    dst_ar64 += 4;
2926
0
  }
2927
0
}
2928
2929
void MergeXRGB16To8Row_C(const uint16_t* src_r,
2930
                         const uint16_t* src_g,
2931
                         const uint16_t* src_b,
2932
                         uint8_t* dst_argb,
2933
                         int depth,
2934
0
                         int width) {
2935
0
  assert(depth >= 8);
2936
0
  assert(depth <= 16);
2937
0
  int x;
2938
0
  int shift = depth - 8;
2939
0
  for (x = 0; x < width; ++x) {
2940
0
    dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
2941
0
    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
2942
0
    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
2943
0
    dst_argb[3] = 0xff;
2944
0
    dst_argb += 4;
2945
0
  }
2946
0
}
2947
2948
void SplitXRGBRow_C(const uint8_t* src_argb,
2949
                    uint8_t* dst_r,
2950
                    uint8_t* dst_g,
2951
                    uint8_t* dst_b,
2952
0
                    int width) {
2953
0
  int x;
2954
0
  for (x = 0; x < width; ++x) {
2955
0
    dst_b[x] = src_argb[0];
2956
0
    dst_g[x] = src_argb[1];
2957
0
    dst_r[x] = src_argb[2];
2958
0
    src_argb += 4;
2959
0
  }
2960
0
}
2961
2962
void MergeXRGBRow_C(const uint8_t* src_r,
2963
                    const uint8_t* src_g,
2964
                    const uint8_t* src_b,
2965
                    uint8_t* dst_argb,
2966
0
                    int width) {
2967
0
  int x;
2968
0
  for (x = 0; x < width; ++x) {
2969
0
    dst_argb[0] = src_b[x];
2970
0
    dst_argb[1] = src_g[x];
2971
0
    dst_argb[2] = src_r[x];
2972
0
    dst_argb[3] = 255;
2973
0
    dst_argb += 4;
2974
0
  }
2975
0
}
2976
2977
// Convert lsb formats to msb, depending on sample depth.
2978
void MergeUVRow_16_C(const uint16_t* src_u,
2979
                     const uint16_t* src_v,
2980
                     uint16_t* dst_uv,
2981
                     int depth,
2982
0
                     int width) {
2983
0
  int shift = 16 - depth;
2984
0
  assert(depth >= 8);
2985
0
  assert(depth <= 16);
2986
0
  int x;
2987
0
  for (x = 0; x < width; ++x) {
2988
0
    dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
2989
0
    dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
2990
0
    dst_uv += 2;
2991
0
  }
2992
0
}
2993
2994
// Convert msb formats to lsb, depending on sample depth.
2995
void SplitUVRow_16_C(const uint16_t* src_uv,
2996
                     uint16_t* dst_u,
2997
                     uint16_t* dst_v,
2998
                     int depth,
2999
0
                     int width) {
3000
0
  int shift = 16 - depth;
3001
0
  int x;
3002
0
  assert(depth >= 8);
3003
0
  assert(depth <= 16);
3004
0
  for (x = 0; x < width; ++x) {
3005
0
    dst_u[x] = src_uv[0] >> shift;
3006
0
    dst_v[x] = src_uv[1] >> shift;
3007
0
    src_uv += 2;
3008
0
  }
3009
0
}
3010
3011
void MultiplyRow_16_C(const uint16_t* src_y,
3012
                      uint16_t* dst_y,
3013
                      int scale,
3014
0
                      int width) {
3015
0
  int x;
3016
0
  for (x = 0; x < width; ++x) {
3017
0
    dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
3018
0
  }
3019
0
}
3020
3021
void DivideRow_16_C(const uint16_t* src_y,
3022
                    uint16_t* dst_y,
3023
                    int scale,
3024
0
                    int width) {
3025
0
  int x;
3026
0
  for (x = 0; x < width; ++x) {
3027
0
    dst_y[x] = (src_y[x] * scale) >> 16;
3028
0
  }
3029
0
}
3030
3031
// Use scale to convert lsb formats to msb, depending how many bits there are:
3032
// 32768 = 9 bits
3033
// 16384 = 10 bits
3034
// 4096 = 12 bits
3035
// 256 = 16 bits
3036
// TODO(fbarchard): change scale to bits
3037
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
3038
3039
void Convert16To8Row_C(const uint16_t* src_y,
3040
                       uint8_t* dst_y,
3041
                       int scale,
3042
0
                       int width) {
3043
0
  int x;
3044
0
  assert(scale >= 256);
3045
0
  assert(scale <= 32768);
3046
3047
0
  for (x = 0; x < width; ++x) {
3048
0
    dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
3049
0
  }
3050
0
}
3051
3052
// Use scale to convert lsb formats to msb, depending how many bits there are:
3053
// 1024 = 10 bits
3054
void Convert8To16Row_C(const uint8_t* src_y,
3055
                       uint16_t* dst_y,
3056
                       int scale,
3057
0
                       int width) {
3058
0
  int x;
3059
0
  scale *= 0x0101;  // replicates the byte.
3060
0
  for (x = 0; x < width; ++x) {
3061
0
    dst_y[x] = (src_y[x] * scale) >> 16;
3062
0
  }
3063
0
}
3064
3065
// Use scale to convert J420 to I420
3066
// scale parameter is 8.8 fixed point but limited to 0 to 255
3067
// Function is based on DivideRow, but adds a bias
3068
// Does not clamp
3069
void Convert8To8Row_C(const uint8_t* src_y,
3070
                      uint8_t* dst_y,
3071
                      int scale,
3072
                      int bias,
3073
0
                      int width) {
3074
0
  int x;
3075
0
  assert(scale >= 0);
3076
0
  assert(scale <= 255);
3077
3078
0
  for (x = 0; x < width; ++x) {
3079
0
    dst_y[x] = ((src_y[x] * scale) >> 8) + bias;
3080
0
  }
3081
0
}
3082
3083
0
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
3084
0
  memcpy(dst, src, count);
3085
0
}
3086
3087
0
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
3088
0
  memcpy(dst, src, count * 2);
3089
0
}
3090
3091
0
void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
3092
0
  memset(dst, v8, width);
3093
0
}
3094
3095
0
void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
3096
0
  int x;
3097
0
  for (x = 0; x < width; ++x) {
3098
0
    memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
3099
0
  }
3100
0
}
3101
3102
// Filter 2 rows of YUY2 UV's (422) into U and V (420).
3103
void YUY2ToUVRow_C(const uint8_t* src_yuy2,
3104
                   int src_stride_yuy2,
3105
                   uint8_t* dst_u,
3106
                   uint8_t* dst_v,
3107
0
                   int width) {
3108
  // Output a row of UV values, filtering 2 rows of YUY2.
3109
0
  int x;
3110
0
  for (x = 0; x < width; x += 2) {
3111
0
    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3112
0
    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3113
0
    src_yuy2 += 4;
3114
0
    dst_u += 1;
3115
0
    dst_v += 1;
3116
0
  }
3117
0
}
3118
3119
// Filter 2 rows of YUY2 UV's (422) into UV (NV12).
3120
void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
3121
                     int src_stride_yuy2,
3122
                     uint8_t* dst_uv,
3123
0
                     int width) {
3124
  // Output a row of UV values, filtering 2 rows of YUY2.
3125
0
  int x;
3126
0
  for (x = 0; x < width; x += 2) {
3127
0
    dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3128
0
    dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3129
0
    src_yuy2 += 4;
3130
0
    dst_uv += 2;
3131
0
  }
3132
0
}
3133
3134
// Copy row of YUY2 UV's (422) into U and V (422).
3135
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3136
                      uint8_t* dst_u,
3137
                      uint8_t* dst_v,
3138
0
                      int width) {
3139
  // Output a row of UV values.
3140
0
  int x;
3141
0
  for (x = 0; x < width; x += 2) {
3142
0
    dst_u[0] = src_yuy2[1];
3143
0
    dst_v[0] = src_yuy2[3];
3144
0
    src_yuy2 += 4;
3145
0
    dst_u += 1;
3146
0
    dst_v += 1;
3147
0
  }
3148
0
}
3149
3150
// Copy row of YUY2 Y's (422) into Y (420/422).
3151
0
void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3152
  // Output a row of Y values.
3153
0
  int x;
3154
0
  for (x = 0; x < width - 1; x += 2) {
3155
0
    dst_y[x] = src_yuy2[0];
3156
0
    dst_y[x + 1] = src_yuy2[2];
3157
0
    src_yuy2 += 4;
3158
0
  }
3159
0
  if (width & 1) {
3160
0
    dst_y[width - 1] = src_yuy2[0];
3161
0
  }
3162
0
}
3163
3164
// Filter 2 rows of UYVY UV's (422) into U and V (420).
3165
void UYVYToUVRow_C(const uint8_t* src_uyvy,
3166
                   int src_stride_uyvy,
3167
                   uint8_t* dst_u,
3168
                   uint8_t* dst_v,
3169
0
                   int width) {
3170
  // Output a row of UV values.
3171
0
  int x;
3172
0
  for (x = 0; x < width; x += 2) {
3173
0
    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3174
0
    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3175
0
    src_uyvy += 4;
3176
0
    dst_u += 1;
3177
0
    dst_v += 1;
3178
0
  }
3179
0
}
3180
3181
// Copy row of UYVY UV's (422) into U and V (422).
3182
void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3183
                      uint8_t* dst_u,
3184
                      uint8_t* dst_v,
3185
0
                      int width) {
3186
  // Output a row of UV values.
3187
0
  int x;
3188
0
  for (x = 0; x < width; x += 2) {
3189
0
    dst_u[0] = src_uyvy[0];
3190
0
    dst_v[0] = src_uyvy[2];
3191
0
    src_uyvy += 4;
3192
0
    dst_u += 1;
3193
0
    dst_v += 1;
3194
0
  }
3195
0
}
3196
3197
// Copy row of UYVY Y's (422) into Y (420/422).
3198
0
void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3199
  // Output a row of Y values.
3200
0
  int x;
3201
0
  for (x = 0; x < width - 1; x += 2) {
3202
0
    dst_y[x] = src_uyvy[1];
3203
0
    dst_y[x + 1] = src_uyvy[3];
3204
0
    src_uyvy += 4;
3205
0
  }
3206
0
  if (width & 1) {
3207
0
    dst_y[width - 1] = src_uyvy[1];
3208
0
  }
3209
0
}
3210
3211
#define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3212
3213
// Blend src_argb over src_argb1 and store to dst_argb.
3214
// dst_argb may be src_argb or src_argb1.
3215
// This code mimics the SSSE3 version for better testability.
3216
void ARGBBlendRow_C(const uint8_t* src_argb,
3217
                    const uint8_t* src_argb1,
3218
                    uint8_t* dst_argb,
3219
0
                    int width) {
3220
0
  int x;
3221
0
  for (x = 0; x < width - 1; x += 2) {
3222
0
    uint32_t fb = src_argb[0];
3223
0
    uint32_t fg = src_argb[1];
3224
0
    uint32_t fr = src_argb[2];
3225
0
    uint32_t a = src_argb[3];
3226
0
    uint32_t bb = src_argb1[0];
3227
0
    uint32_t bg = src_argb1[1];
3228
0
    uint32_t br = src_argb1[2];
3229
0
    dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3230
0
    dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3231
0
    dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3232
0
    dst_argb[3] = 255u;
3233
3234
0
    fb = src_argb[4 + 0];
3235
0
    fg = src_argb[4 + 1];
3236
0
    fr = src_argb[4 + 2];
3237
0
    a = src_argb[4 + 3];
3238
0
    bb = src_argb1[4 + 0];
3239
0
    bg = src_argb1[4 + 1];
3240
0
    br = src_argb1[4 + 2];
3241
0
    dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3242
0
    dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3243
0
    dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3244
0
    dst_argb[4 + 3] = 255u;
3245
0
    src_argb += 8;
3246
0
    src_argb1 += 8;
3247
0
    dst_argb += 8;
3248
0
  }
3249
3250
0
  if (width & 1) {
3251
0
    uint32_t fb = src_argb[0];
3252
0
    uint32_t fg = src_argb[1];
3253
0
    uint32_t fr = src_argb[2];
3254
0
    uint32_t a = src_argb[3];
3255
0
    uint32_t bb = src_argb1[0];
3256
0
    uint32_t bg = src_argb1[1];
3257
0
    uint32_t br = src_argb1[2];
3258
0
    dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3259
0
    dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3260
0
    dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3261
0
    dst_argb[3] = 255u;
3262
0
  }
3263
0
}
3264
#undef BLEND
3265
3266
0
#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
3267
void BlendPlaneRow_C(const uint8_t* src0,
3268
                     const uint8_t* src1,
3269
                     const uint8_t* alpha,
3270
                     uint8_t* dst,
3271
0
                     int width) {
3272
0
  int x;
3273
0
  for (x = 0; x < width - 1; x += 2) {
3274
0
    dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3275
0
    dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3276
0
    src0 += 2;
3277
0
    src1 += 2;
3278
0
    alpha += 2;
3279
0
    dst += 2;
3280
0
  }
3281
0
  if (width & 1) {
3282
0
    dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3283
0
  }
3284
0
}
3285
#undef UBLEND
3286
3287
0
#define ATTENUATE(f, a) (f * a + 255) >> 8
3288
3289
// Multiply source RGB by alpha and store to destination.
3290
0
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3291
0
  int i;
3292
0
  for (i = 0; i < width - 1; i += 2) {
3293
0
    uint32_t b = src_argb[0];
3294
0
    uint32_t g = src_argb[1];
3295
0
    uint32_t r = src_argb[2];
3296
0
    uint32_t a = src_argb[3];
3297
0
    dst_argb[0] = ATTENUATE(b, a);
3298
0
    dst_argb[1] = ATTENUATE(g, a);
3299
0
    dst_argb[2] = ATTENUATE(r, a);
3300
0
    dst_argb[3] = STATIC_CAST(uint8_t, a);
3301
0
    b = src_argb[4];
3302
0
    g = src_argb[5];
3303
0
    r = src_argb[6];
3304
0
    a = src_argb[7];
3305
0
    dst_argb[4] = ATTENUATE(b, a);
3306
0
    dst_argb[5] = ATTENUATE(g, a);
3307
0
    dst_argb[6] = ATTENUATE(r, a);
3308
0
    dst_argb[7] = STATIC_CAST(uint8_t, a);
3309
0
    src_argb += 8;
3310
0
    dst_argb += 8;
3311
0
  }
3312
3313
0
  if (width & 1) {
3314
0
    const uint32_t b = src_argb[0];
3315
0
    const uint32_t g = src_argb[1];
3316
0
    const uint32_t r = src_argb[2];
3317
0
    const uint32_t a = src_argb[3];
3318
0
    dst_argb[0] = ATTENUATE(b, a);
3319
0
    dst_argb[1] = ATTENUATE(g, a);
3320
0
    dst_argb[2] = ATTENUATE(r, a);
3321
0
    dst_argb[3] = STATIC_CAST(uint8_t, a);
3322
0
  }
3323
0
}
3324
#undef ATTENUATE
3325
3326
// Divide source RGB by alpha and store to destination.
3327
// b = (b * 255 + (a / 2)) / a;
3328
// g = (g * 255 + (a / 2)) / a;
3329
// r = (r * 255 + (a / 2)) / a;
3330
// Reciprocal method is off by 1 on some values. ie 125
3331
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3332
#define T(a) 0x01000000 + (0x10000 / a)
3333
const uint32_t fixed_invtbl8[256] = {
3334
    0x01000000, 0x0100ffff, T(0x02), T(0x03),   T(0x04), T(0x05), T(0x06),
3335
    T(0x07),    T(0x08),    T(0x09), T(0x0a),   T(0x0b), T(0x0c), T(0x0d),
3336
    T(0x0e),    T(0x0f),    T(0x10), T(0x11),   T(0x12), T(0x13), T(0x14),
3337
    T(0x15),    T(0x16),    T(0x17), T(0x18),   T(0x19), T(0x1a), T(0x1b),
3338
    T(0x1c),    T(0x1d),    T(0x1e), T(0x1f),   T(0x20), T(0x21), T(0x22),
3339
    T(0x23),    T(0x24),    T(0x25), T(0x26),   T(0x27), T(0x28), T(0x29),
3340
    T(0x2a),    T(0x2b),    T(0x2c), T(0x2d),   T(0x2e), T(0x2f), T(0x30),
3341
    T(0x31),    T(0x32),    T(0x33), T(0x34),   T(0x35), T(0x36), T(0x37),
3342
    T(0x38),    T(0x39),    T(0x3a), T(0x3b),   T(0x3c), T(0x3d), T(0x3e),
3343
    T(0x3f),    T(0x40),    T(0x41), T(0x42),   T(0x43), T(0x44), T(0x45),
3344
    T(0x46),    T(0x47),    T(0x48), T(0x49),   T(0x4a), T(0x4b), T(0x4c),
3345
    T(0x4d),    T(0x4e),    T(0x4f), T(0x50),   T(0x51), T(0x52), T(0x53),
3346
    T(0x54),    T(0x55),    T(0x56), T(0x57),   T(0x58), T(0x59), T(0x5a),
3347
    T(0x5b),    T(0x5c),    T(0x5d), T(0x5e),   T(0x5f), T(0x60), T(0x61),
3348
    T(0x62),    T(0x63),    T(0x64), T(0x65),   T(0x66), T(0x67), T(0x68),
3349
    T(0x69),    T(0x6a),    T(0x6b), T(0x6c),   T(0x6d), T(0x6e), T(0x6f),
3350
    T(0x70),    T(0x71),    T(0x72), T(0x73),   T(0x74), T(0x75), T(0x76),
3351
    T(0x77),    T(0x78),    T(0x79), T(0x7a),   T(0x7b), T(0x7c), T(0x7d),
3352
    T(0x7e),    T(0x7f),    T(0x80), T(0x81),   T(0x82), T(0x83), T(0x84),
3353
    T(0x85),    T(0x86),    T(0x87), T(0x88),   T(0x89), T(0x8a), T(0x8b),
3354
    T(0x8c),    T(0x8d),    T(0x8e), T(0x8f),   T(0x90), T(0x91), T(0x92),
3355
    T(0x93),    T(0x94),    T(0x95), T(0x96),   T(0x97), T(0x98), T(0x99),
3356
    T(0x9a),    T(0x9b),    T(0x9c), T(0x9d),   T(0x9e), T(0x9f), T(0xa0),
3357
    T(0xa1),    T(0xa2),    T(0xa3), T(0xa4),   T(0xa5), T(0xa6), T(0xa7),
3358
    T(0xa8),    T(0xa9),    T(0xaa), T(0xab),   T(0xac), T(0xad), T(0xae),
3359
    T(0xaf),    T(0xb0),    T(0xb1), T(0xb2),   T(0xb3), T(0xb4), T(0xb5),
3360
    T(0xb6),    T(0xb7),    T(0xb8), T(0xb9),   T(0xba), T(0xbb), T(0xbc),
3361
    T(0xbd),    T(0xbe),    T(0xbf), T(0xc0),   T(0xc1), T(0xc2), T(0xc3),
3362
    T(0xc4),    T(0xc5),    T(0xc6), T(0xc7),   T(0xc8), T(0xc9), T(0xca),
3363
    T(0xcb),    T(0xcc),    T(0xcd), T(0xce),   T(0xcf), T(0xd0), T(0xd1),
3364
    T(0xd2),    T(0xd3),    T(0xd4), T(0xd5),   T(0xd6), T(0xd7), T(0xd8),
3365
    T(0xd9),    T(0xda),    T(0xdb), T(0xdc),   T(0xdd), T(0xde), T(0xdf),
3366
    T(0xe0),    T(0xe1),    T(0xe2), T(0xe3),   T(0xe4), T(0xe5), T(0xe6),
3367
    T(0xe7),    T(0xe8),    T(0xe9), T(0xea),   T(0xeb), T(0xec), T(0xed),
3368
    T(0xee),    T(0xef),    T(0xf0), T(0xf1),   T(0xf2), T(0xf3), T(0xf4),
3369
    T(0xf5),    T(0xf6),    T(0xf7), T(0xf8),   T(0xf9), T(0xfa), T(0xfb),
3370
    T(0xfc),    T(0xfd),    T(0xfe), 0x01000100};
3371
#undef T
3372
3373
#if defined(LIBYUV_UNATTENUATE_DUP)
3374
// This code mimics the Intel SIMD version for better testability.
3375
#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
3376
#else
3377
#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
3378
#endif
3379
3380
// mimics the Intel SIMD code for exactness.
3381
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3382
                          uint8_t* dst_argb,
3383
0
                          int width) {
3384
0
  int i;
3385
0
  for (i = 0; i < width; ++i) {
3386
0
    uint32_t b = src_argb[0];
3387
0
    uint32_t g = src_argb[1];
3388
0
    uint32_t r = src_argb[2];
3389
0
    const uint32_t a = src_argb[3];
3390
0
    const uint32_t ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
3391
3392
    // Clamping should not be necessary but is free in assembly.
3393
0
    dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
3394
0
    dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
3395
0
    dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
3396
0
    dst_argb[3] = STATIC_CAST(uint8_t, a);
3397
0
    src_argb += 4;
3398
0
    dst_argb += 4;
3399
0
  }
3400
0
}
3401
3402
void ComputeCumulativeSumRow_C(const uint8_t* row,
3403
                               int32_t* cumsum,
3404
                               const int32_t* previous_cumsum,
3405
0
                               int width) {
3406
0
  int32_t row_sum[4] = {0, 0, 0, 0};
3407
0
  int x;
3408
0
  for (x = 0; x < width; ++x) {
3409
0
    row_sum[0] += row[x * 4 + 0];
3410
0
    row_sum[1] += row[x * 4 + 1];
3411
0
    row_sum[2] += row[x * 4 + 2];
3412
0
    row_sum[3] += row[x * 4 + 3];
3413
0
    cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3414
0
    cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3415
0
    cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3416
0
    cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3417
0
  }
3418
0
}
3419
3420
void CumulativeSumToAverageRow_C(const int32_t* tl,
3421
                                 const int32_t* bl,
3422
                                 int w,
3423
                                 int area,
3424
                                 uint8_t* dst,
3425
0
                                 int count) {
3426
0
  float ooa;
3427
0
  int i;
3428
0
  assert(area != 0);
3429
3430
0
  ooa = 1.0f / STATIC_CAST(float, area);
3431
0
  for (i = 0; i < count; ++i) {
3432
0
    dst[0] =
3433
0
        (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
3434
0
                  ooa);
3435
0
    dst[1] =
3436
0
        (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
3437
0
                  ooa);
3438
0
    dst[2] =
3439
0
        (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
3440
0
                  ooa);
3441
0
    dst[3] =
3442
0
        (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
3443
0
                  ooa);
3444
0
    dst += 4;
3445
0
    tl += 4;
3446
0
    bl += 4;
3447
0
  }
3448
0
}
3449
3450
// Copy pixels from rotated source to destination row with a slope.
3451
LIBYUV_API
3452
void ARGBAffineRow_C(const uint8_t* src_argb,
3453
                     int src_argb_stride,
3454
                     uint8_t* dst_argb,
3455
                     const float* uv_dudv,
3456
0
                     int width) {
3457
0
  int i;
3458
  // Render a row of pixels from source into a buffer.
3459
0
  float uv[2];
3460
0
  uv[0] = uv_dudv[0];
3461
0
  uv[1] = uv_dudv[1];
3462
0
  for (i = 0; i < width; ++i) {
3463
0
    int x = (int)(uv[0]);
3464
0
    int y = (int)(uv[1]);
3465
0
    *(uint32_t*)(dst_argb) =
3466
0
        *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3467
0
    dst_argb += 4;
3468
0
    uv[0] += uv_dudv[2];
3469
0
    uv[1] += uv_dudv[3];
3470
0
  }
3471
0
}
3472
3473
// Blend 2 rows into 1.
3474
static void HalfRow_C(const uint8_t* src_uv,
3475
                      ptrdiff_t src_uv_stride,
3476
                      uint8_t* dst_uv,
3477
0
                      int width) {
3478
0
  int x;
3479
0
  for (x = 0; x < width; ++x) {
3480
0
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3481
0
  }
3482
0
}
3483
3484
static void HalfRow_16_C(const uint16_t* src_uv,
3485
                         ptrdiff_t src_uv_stride,
3486
                         uint16_t* dst_uv,
3487
22.0k
                         int width) {
3488
22.0k
  int x;
3489
27.3M
  for (x = 0; x < width; ++x) {
3490
27.2M
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3491
27.2M
  }
3492
22.0k
}
3493
3494
static void HalfRow_16To8_C(const uint16_t* src_uv,
3495
                            ptrdiff_t src_uv_stride,
3496
                            uint8_t* dst_uv,
3497
                            int scale,
3498
0
                            int width) {
3499
0
  int x;
3500
0
  for (x = 0; x < width; ++x) {
3501
0
    dst_uv[x] = STATIC_CAST(
3502
0
        uint8_t,
3503
0
        C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
3504
0
  }
3505
0
}
3506
3507
// C version 2x2 -> 2x1.
3508
void InterpolateRow_C(uint8_t* dst_ptr,
3509
                      const uint8_t* src_ptr,
3510
                      ptrdiff_t src_stride,
3511
                      int width,
3512
0
                      int source_y_fraction) {
3513
0
  int y1_fraction = source_y_fraction;
3514
0
  int y0_fraction = 256 - y1_fraction;
3515
0
  const uint8_t* src_ptr1 = src_ptr + src_stride;
3516
0
  int x;
3517
0
  assert(source_y_fraction >= 0);
3518
0
  assert(source_y_fraction < 256);
3519
3520
0
  if (y1_fraction == 0) {
3521
0
    memcpy(dst_ptr, src_ptr, width);
3522
0
    return;
3523
0
  }
3524
0
  if (y1_fraction == 128) {
3525
0
    HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3526
0
    return;
3527
0
  }
3528
0
  for (x = 0; x < width; ++x) {
3529
0
    dst_ptr[0] = STATIC_CAST(
3530
0
        uint8_t,
3531
0
        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3532
0
    ++src_ptr;
3533
0
    ++src_ptr1;
3534
0
    ++dst_ptr;
3535
0
  }
3536
0
}
3537
3538
// C version 2x2 -> 2x1.
3539
void InterpolateRow_16_C(uint16_t* dst_ptr,
3540
                         const uint16_t* src_ptr,
3541
                         ptrdiff_t src_stride,
3542
                         int width,
3543
6.09M
                         int source_y_fraction) {
3544
6.09M
  int y1_fraction = source_y_fraction;
3545
6.09M
  int y0_fraction = 256 - y1_fraction;
3546
6.09M
  const uint16_t* src_ptr1 = src_ptr + src_stride;
3547
6.09M
  int x;
3548
6.09M
  assert(source_y_fraction >= 0);
3549
6.09M
  assert(source_y_fraction < 256);
3550
3551
6.09M
  if (y1_fraction == 0) {
3552
880k
    memcpy(dst_ptr, src_ptr, width * 2);
3553
880k
    return;
3554
880k
  }
3555
5.21M
  if (y1_fraction == 128) {
3556
22.0k
    HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3557
22.0k
    return;
3558
22.0k
  }
3559
6.82G
  for (x = 0; x < width; ++x) {
3560
6.81G
    dst_ptr[0] = STATIC_CAST(
3561
6.81G
        uint16_t,
3562
6.81G
        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3563
6.81G
    ++src_ptr;
3564
6.81G
    ++src_ptr1;
3565
6.81G
    ++dst_ptr;
3566
6.81G
  }
3567
5.19M
}
3568
3569
// C version 2x2 16 bit-> 2x1 8 bit.
3570
// Use scale to convert lsb formats to msb, depending how many bits there are:
3571
// 32768 = 9 bits
3572
// 16384 = 10 bits
3573
// 4096 = 12 bits
3574
// 256 = 16 bits
3575
// TODO(fbarchard): change scale to bits
3576
3577
void InterpolateRow_16To8_C(uint8_t* dst_ptr,
3578
                            const uint16_t* src_ptr,
3579
                            ptrdiff_t src_stride,
3580
                            int scale,
3581
                            int width,
3582
0
                            int source_y_fraction) {
3583
0
  int y1_fraction = source_y_fraction;
3584
0
  int y0_fraction = 256 - y1_fraction;
3585
0
  const uint16_t* src_ptr1 = src_ptr + src_stride;
3586
0
  int x;
3587
0
  assert(source_y_fraction >= 0);
3588
0
  assert(source_y_fraction < 256);
3589
3590
0
  if (source_y_fraction == 0) {
3591
0
    Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
3592
0
    return;
3593
0
  }
3594
0
  if (source_y_fraction == 128) {
3595
0
    HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
3596
0
    return;
3597
0
  }
3598
0
  for (x = 0; x < width; ++x) {
3599
0
    dst_ptr[0] = STATIC_CAST(
3600
0
        uint8_t,
3601
0
        C16TO8(
3602
0
            (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
3603
0
            scale));
3604
0
    src_ptr += 1;
3605
0
    src_ptr1 += 1;
3606
0
    dst_ptr += 1;
3607
0
  }
3608
0
}
3609
3610
// Use first 4 shuffler values to reorder ARGB channels.
3611
void ARGBShuffleRow_C(const uint8_t* src_argb,
3612
                      uint8_t* dst_argb,
3613
                      const uint8_t* shuffler,
3614
0
                      int width) {
3615
0
  int index0 = shuffler[0];
3616
0
  int index1 = shuffler[1];
3617
0
  int index2 = shuffler[2];
3618
0
  int index3 = shuffler[3];
3619
  // Shuffle a row of ARGB.
3620
0
  int x;
3621
0
  for (x = 0; x < width; ++x) {
3622
    // To support in-place conversion.
3623
0
    uint8_t b = src_argb[index0];
3624
0
    uint8_t g = src_argb[index1];
3625
0
    uint8_t r = src_argb[index2];
3626
0
    uint8_t a = src_argb[index3];
3627
0
    dst_argb[0] = b;
3628
0
    dst_argb[1] = g;
3629
0
    dst_argb[2] = r;
3630
0
    dst_argb[3] = a;
3631
0
    src_argb += 4;
3632
0
    dst_argb += 4;
3633
0
  }
3634
0
}
3635
3636
void I422ToYUY2Row_C(const uint8_t* src_y,
3637
                     const uint8_t* src_u,
3638
                     const uint8_t* src_v,
3639
                     uint8_t* dst_frame,
3640
0
                     int width) {
3641
0
  int x;
3642
0
  for (x = 0; x < width - 1; x += 2) {
3643
0
    dst_frame[0] = src_y[0];
3644
0
    dst_frame[1] = src_u[0];
3645
0
    dst_frame[2] = src_y[1];
3646
0
    dst_frame[3] = src_v[0];
3647
0
    dst_frame += 4;
3648
0
    src_y += 2;
3649
0
    src_u += 1;
3650
0
    src_v += 1;
3651
0
  }
3652
0
  if (width & 1) {
3653
0
    dst_frame[0] = src_y[0];
3654
0
    dst_frame[1] = src_u[0];
3655
0
    dst_frame[2] = 0;
3656
0
    dst_frame[3] = src_v[0];
3657
0
  }
3658
0
}
3659
3660
void I422ToUYVYRow_C(const uint8_t* src_y,
3661
                     const uint8_t* src_u,
3662
                     const uint8_t* src_v,
3663
                     uint8_t* dst_frame,
3664
0
                     int width) {
3665
0
  int x;
3666
0
  for (x = 0; x < width - 1; x += 2) {
3667
0
    dst_frame[0] = src_u[0];
3668
0
    dst_frame[1] = src_y[0];
3669
0
    dst_frame[2] = src_v[0];
3670
0
    dst_frame[3] = src_y[1];
3671
0
    dst_frame += 4;
3672
0
    src_y += 2;
3673
0
    src_u += 1;
3674
0
    src_v += 1;
3675
0
  }
3676
0
  if (width & 1) {
3677
0
    dst_frame[0] = src_u[0];
3678
0
    dst_frame[1] = src_y[0];
3679
0
    dst_frame[2] = src_v[0];
3680
0
    dst_frame[3] = 0;
3681
0
  }
3682
0
}
3683
3684
void ARGBPolynomialRow_C(const uint8_t* src_argb,
3685
                         uint8_t* dst_argb,
3686
                         const float* poly,
3687
0
                         int width) {
3688
0
  int i;
3689
0
  for (i = 0; i < width; ++i) {
3690
0
    float b = (float)(src_argb[0]);
3691
0
    float g = (float)(src_argb[1]);
3692
0
    float r = (float)(src_argb[2]);
3693
0
    float a = (float)(src_argb[3]);
3694
0
    float b2 = b * b;
3695
0
    float g2 = g * g;
3696
0
    float r2 = r * r;
3697
0
    float a2 = a * a;
3698
0
    float db = poly[0] + poly[4] * b;
3699
0
    float dg = poly[1] + poly[5] * g;
3700
0
    float dr = poly[2] + poly[6] * r;
3701
0
    float da = poly[3] + poly[7] * a;
3702
0
    float b3 = b2 * b;
3703
0
    float g3 = g2 * g;
3704
0
    float r3 = r2 * r;
3705
0
    float a3 = a2 * a;
3706
0
    db += poly[8] * b2;
3707
0
    dg += poly[9] * g2;
3708
0
    dr += poly[10] * r2;
3709
0
    da += poly[11] * a2;
3710
0
    db += poly[12] * b3;
3711
0
    dg += poly[13] * g3;
3712
0
    dr += poly[14] * r3;
3713
0
    da += poly[15] * a3;
3714
3715
0
    dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
3716
0
    dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
3717
0
    dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
3718
0
    dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
3719
0
    src_argb += 4;
3720
0
    dst_argb += 4;
3721
0
  }
3722
0
}
3723
3724
// Samples assumed to be unsigned in low 9, 10 or 12 bits.  Scale factor
3725
// adjust the source integer range to the half float range desired.
3726
3727
// This magic constant is 2^-112. Multiplying by this
3728
// is the same as subtracting 112 from the exponent, which
3729
// is the difference in exponent bias between 32-bit and
3730
// 16-bit floats. Once we've done this subtraction, we can
3731
// simply extract the low bits of the exponent and the high
3732
// bits of the mantissa from our float and we're done.
3733
3734
// Work around GCC 7 punning warning -Wstrict-aliasing
3735
#if defined(__GNUC__)
3736
typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3737
#else
3738
typedef uint32_t uint32_alias_t;
3739
#endif
3740
3741
void HalfFloatRow_C(const uint16_t* src,
3742
                    uint16_t* dst,
3743
                    float scale,
3744
0
                    int width) {
3745
0
  int i;
3746
0
  float mult = 1.9259299444e-34f * scale;
3747
0
  for (i = 0; i < width; ++i) {
3748
0
    float value = src[i] * mult;
3749
0
    dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3750
0
  }
3751
0
}
3752
3753
0
void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3754
0
  int i;
3755
0
  for (i = 0; i < width; ++i) {
3756
0
    float value = src[i] * scale;
3757
0
    dst[i] = value;
3758
0
  }
3759
0
}
3760
3761
void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3762
                             uint8_t* dst_argb,
3763
                             int width,
3764
                             const uint8_t* luma,
3765
0
                             uint32_t lumacoeff) {
3766
0
  uint32_t bc = lumacoeff & 0xff;
3767
0
  uint32_t gc = (lumacoeff >> 8) & 0xff;
3768
0
  uint32_t rc = (lumacoeff >> 16) & 0xff;
3769
3770
0
  int i;
3771
0
  for (i = 0; i < width - 1; i += 2) {
3772
    // Luminance in rows, color values in columns.
3773
0
    const uint8_t* luma0 =
3774
0
        ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3775
0
        luma;
3776
0
    const uint8_t* luma1;
3777
0
    dst_argb[0] = luma0[src_argb[0]];
3778
0
    dst_argb[1] = luma0[src_argb[1]];
3779
0
    dst_argb[2] = luma0[src_argb[2]];
3780
0
    dst_argb[3] = src_argb[3];
3781
0
    luma1 =
3782
0
        ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3783
0
        luma;
3784
0
    dst_argb[4] = luma1[src_argb[4]];
3785
0
    dst_argb[5] = luma1[src_argb[5]];
3786
0
    dst_argb[6] = luma1[src_argb[6]];
3787
0
    dst_argb[7] = src_argb[7];
3788
0
    src_argb += 8;
3789
0
    dst_argb += 8;
3790
0
  }
3791
0
  if (width & 1) {
3792
    // Luminance in rows, color values in columns.
3793
0
    const uint8_t* luma0 =
3794
0
        ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3795
0
        luma;
3796
0
    dst_argb[0] = luma0[src_argb[0]];
3797
0
    dst_argb[1] = luma0[src_argb[1]];
3798
0
    dst_argb[2] = luma0[src_argb[2]];
3799
0
    dst_argb[3] = src_argb[3];
3800
0
  }
3801
0
}
3802
3803
0
void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3804
0
  int i;
3805
0
  for (i = 0; i < width - 1; i += 2) {
3806
0
    dst[3] = src[3];
3807
0
    dst[7] = src[7];
3808
0
    dst += 8;
3809
0
    src += 8;
3810
0
  }
3811
0
  if (width & 1) {
3812
0
    dst[3] = src[3];
3813
0
  }
3814
0
}
3815
3816
0
void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3817
0
  int i;
3818
0
  for (i = 0; i < width - 1; i += 2) {
3819
0
    dst_a[0] = src_argb[3];
3820
0
    dst_a[1] = src_argb[7];
3821
0
    dst_a += 2;
3822
0
    src_argb += 8;
3823
0
  }
3824
0
  if (width & 1) {
3825
0
    dst_a[0] = src_argb[3];
3826
0
  }
3827
0
}
3828
3829
0
void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3830
0
  int i;
3831
0
  for (i = 0; i < width - 1; i += 2) {
3832
0
    dst[3] = src[0];
3833
0
    dst[7] = src[1];
3834
0
    dst += 8;
3835
0
    src += 2;
3836
0
  }
3837
0
  if (width & 1) {
3838
0
    dst[3] = src[0];
3839
0
  }
3840
0
}
3841
3842
// Maximum temporary width for wrappers to process at a time, in pixels.
3843
223k
#define MAXTWIDTH 2048
3844
3845
#if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
3846
    defined(HAS_I422TORGB565ROW_SSSE3) && !defined(LIBYUV_ENABLE_ROWWIN)
3847
// row_win.cc has asm version, but GCC uses 2 step wrapper.
3848
void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3849
                           const uint8_t* src_u,
3850
                           const uint8_t* src_v,
3851
                           uint8_t* dst_rgb565,
3852
                           const struct YuvConstants* yuvconstants,
3853
0
                           int width) {
3854
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3855
0
  while (width > 0) {
3856
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3857
0
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3858
0
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3859
0
    src_y += twidth;
3860
0
    src_u += twidth / 2;
3861
0
    src_v += twidth / 2;
3862
0
    dst_rgb565 += twidth * 2;
3863
0
    width -= twidth;
3864
0
  }
3865
0
}
3866
#endif
3867
3868
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
3869
void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3870
                             const uint8_t* src_u,
3871
                             const uint8_t* src_v,
3872
                             uint8_t* dst_argb1555,
3873
                             const struct YuvConstants* yuvconstants,
3874
0
                             int width) {
3875
  // Row buffer for intermediate ARGB pixels.
3876
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3877
0
  while (width > 0) {
3878
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3879
0
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3880
0
    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3881
0
    src_y += twidth;
3882
0
    src_u += twidth / 2;
3883
0
    src_v += twidth / 2;
3884
0
    dst_argb1555 += twidth * 2;
3885
0
    width -= twidth;
3886
0
  }
3887
0
}
3888
#endif
3889
3890
#if defined(HAS_I422TOARGB4444ROW_SSSE3)
3891
void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
3892
                             const uint8_t* src_u,
3893
                             const uint8_t* src_v,
3894
                             uint8_t* dst_argb4444,
3895
                             const struct YuvConstants* yuvconstants,
3896
0
                             int width) {
3897
  // Row buffer for intermediate ARGB pixels.
3898
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3899
0
  while (width > 0) {
3900
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3901
0
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3902
0
    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3903
0
    src_y += twidth;
3904
0
    src_u += twidth / 2;
3905
0
    src_v += twidth / 2;
3906
0
    dst_argb4444 += twidth * 2;
3907
0
    width -= twidth;
3908
0
  }
3909
0
}
3910
#endif
3911
3912
#if defined(HAS_NV12TORGB565ROW_SSSE3)
3913
void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
3914
                           const uint8_t* src_uv,
3915
                           uint8_t* dst_rgb565,
3916
                           const struct YuvConstants* yuvconstants,
3917
0
                           int width) {
3918
  // Row buffer for intermediate ARGB pixels.
3919
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3920
0
  while (width > 0) {
3921
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3922
0
    NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3923
0
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3924
0
    src_y += twidth;
3925
0
    src_uv += twidth;
3926
0
    dst_rgb565 += twidth * 2;
3927
0
    width -= twidth;
3928
0
  }
3929
0
}
3930
#endif
3931
3932
#if defined(HAS_NV12TORGB24ROW_SSSE3)
3933
void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
3934
                          const uint8_t* src_uv,
3935
                          uint8_t* dst_rgb24,
3936
                          const struct YuvConstants* yuvconstants,
3937
0
                          int width) {
3938
  // Row buffer for intermediate ARGB pixels.
3939
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3940
0
  while (width > 0) {
3941
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3942
0
    NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3943
0
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3944
0
    src_y += twidth;
3945
0
    src_uv += twidth;
3946
0
    dst_rgb24 += twidth * 3;
3947
0
    width -= twidth;
3948
0
  }
3949
0
}
3950
#endif
3951
3952
#if defined(HAS_NV21TORGB24ROW_SSSE3)
3953
void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
3954
                          const uint8_t* src_vu,
3955
                          uint8_t* dst_rgb24,
3956
                          const struct YuvConstants* yuvconstants,
3957
0
                          int width) {
3958
  // Row buffer for intermediate ARGB pixels.
3959
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3960
0
  while (width > 0) {
3961
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3962
0
    NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
3963
0
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3964
0
    src_y += twidth;
3965
0
    src_vu += twidth;
3966
0
    dst_rgb24 += twidth * 3;
3967
0
    width -= twidth;
3968
0
  }
3969
0
}
3970
#endif
3971
3972
#if defined(HAS_NV12TORGB24ROW_AVX2)
3973
void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3974
                         const uint8_t* src_uv,
3975
                         uint8_t* dst_rgb24,
3976
                         const struct YuvConstants* yuvconstants,
3977
0
                         int width) {
3978
  // Row buffer for intermediate ARGB pixels.
3979
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3980
0
  while (width > 0) {
3981
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3982
0
    NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3983
0
#if defined(HAS_ARGBTORGB24ROW_AVX2)
3984
0
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3985
#else
3986
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3987
#endif
3988
0
    src_y += twidth;
3989
0
    src_uv += twidth;
3990
0
    dst_rgb24 += twidth * 3;
3991
0
    width -= twidth;
3992
0
  }
3993
0
}
3994
#endif
3995
3996
#if defined(HAS_NV21TORGB24ROW_AVX2)
3997
void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3998
                         const uint8_t* src_vu,
3999
                         uint8_t* dst_rgb24,
4000
                         const struct YuvConstants* yuvconstants,
4001
0
                         int width) {
4002
  // Row buffer for intermediate ARGB pixels.
4003
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4004
0
  while (width > 0) {
4005
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4006
0
    NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
4007
0
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4008
0
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4009
#else
4010
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4011
#endif
4012
0
    src_y += twidth;
4013
0
    src_vu += twidth;
4014
0
    dst_rgb24 += twidth * 3;
4015
0
    width -= twidth;
4016
0
  }
4017
0
}
4018
#endif
4019
4020
#if defined(HAS_I422TORGB565ROW_AVX2)
4021
void I422ToRGB565Row_AVX2(const uint8_t* src_y,
4022
                          const uint8_t* src_u,
4023
                          const uint8_t* src_v,
4024
                          uint8_t* dst_rgb565,
4025
                          const struct YuvConstants* yuvconstants,
4026
2.42k
                          int width) {
4027
2.42k
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4028
4.85k
  while (width > 0) {
4029
2.42k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4030
2.42k
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4031
#if defined(HAS_ARGBTORGB565ROW_AVX2)
4032
    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4033
#else
4034
2.42k
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4035
2.42k
#endif
4036
2.42k
    src_y += twidth;
4037
2.42k
    src_u += twidth / 2;
4038
2.42k
    src_v += twidth / 2;
4039
2.42k
    dst_rgb565 += twidth * 2;
4040
2.42k
    width -= twidth;
4041
2.42k
  }
4042
2.42k
}
4043
#endif
4044
4045
#if defined(HAS_I422TOARGB1555ROW_AVX2)
4046
void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
4047
                            const uint8_t* src_u,
4048
                            const uint8_t* src_v,
4049
                            uint8_t* dst_argb1555,
4050
                            const struct YuvConstants* yuvconstants,
4051
0
                            int width) {
4052
  // Row buffer for intermediate ARGB pixels.
4053
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4054
0
  while (width > 0) {
4055
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4056
0
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4057
#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
4058
    ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
4059
#else
4060
0
    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
4061
0
#endif
4062
0
    src_y += twidth;
4063
0
    src_u += twidth / 2;
4064
0
    src_v += twidth / 2;
4065
0
    dst_argb1555 += twidth * 2;
4066
0
    width -= twidth;
4067
0
  }
4068
0
}
4069
#endif
4070
4071
#if defined(HAS_I422TOARGB4444ROW_AVX2)
4072
void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
4073
                            const uint8_t* src_u,
4074
                            const uint8_t* src_v,
4075
                            uint8_t* dst_argb4444,
4076
                            const struct YuvConstants* yuvconstants,
4077
0
                            int width) {
4078
  // Row buffer for intermediate ARGB pixels.
4079
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4080
0
  while (width > 0) {
4081
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4082
0
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4083
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
4084
    ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
4085
#else
4086
0
    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4087
0
#endif
4088
0
    src_y += twidth;
4089
0
    src_u += twidth / 2;
4090
0
    src_v += twidth / 2;
4091
0
    dst_argb4444 += twidth * 2;
4092
0
    width -= twidth;
4093
0
  }
4094
0
}
4095
#endif
4096
4097
#if defined(HAS_I422TORGB24ROW_AVX2)
4098
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
4099
                         const uint8_t* src_u,
4100
                         const uint8_t* src_v,
4101
                         uint8_t* dst_rgb24,
4102
                         const struct YuvConstants* yuvconstants,
4103
6.09k
                         int width) {
4104
  // Row buffer for intermediate ARGB pixels.
4105
6.09k
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4106
12.1k
  while (width > 0) {
4107
6.09k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4108
6.09k
    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4109
6.09k
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4110
6.09k
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4111
#else
4112
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4113
#endif
4114
6.09k
    src_y += twidth;
4115
6.09k
    src_u += twidth / 2;
4116
6.09k
    src_v += twidth / 2;
4117
6.09k
    dst_rgb24 += twidth * 3;
4118
6.09k
    width -= twidth;
4119
6.09k
  }
4120
6.09k
}
4121
#endif
4122
4123
#if defined(HAS_I444TORGB24ROW_AVX2)
4124
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
4125
                         const uint8_t* src_u,
4126
                         const uint8_t* src_v,
4127
                         uint8_t* dst_rgb24,
4128
                         const struct YuvConstants* yuvconstants,
4129
6.57k
                         int width) {
4130
  // Row buffer for intermediate ARGB pixels.
4131
6.57k
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4132
13.3k
  while (width > 0) {
4133
6.82k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4134
6.82k
    I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4135
6.82k
#if defined(HAS_ARGBTORGB24ROW_AVX2)
4136
6.82k
    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4137
#else
4138
    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4139
#endif
4140
6.82k
    src_y += twidth;
4141
6.82k
    src_u += twidth;
4142
6.82k
    src_v += twidth;
4143
6.82k
    dst_rgb24 += twidth * 3;
4144
6.82k
    width -= twidth;
4145
6.82k
  }
4146
6.57k
}
4147
#endif
4148
4149
#if defined(HAS_NV12TORGB565ROW_AVX2)
4150
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
4151
                          const uint8_t* src_uv,
4152
                          uint8_t* dst_rgb565,
4153
                          const struct YuvConstants* yuvconstants,
4154
0
                          int width) {
4155
  // Row buffer for intermediate ARGB pixels.
4156
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4157
0
  while (width > 0) {
4158
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4159
0
    NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4160
#if defined(HAS_ARGBTORGB565ROW_AVX2)
4161
    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4162
#else
4163
0
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4164
0
#endif
4165
0
    src_y += twidth;
4166
0
    src_uv += twidth;
4167
0
    dst_rgb565 += twidth * 2;
4168
0
    width -= twidth;
4169
0
  }
4170
0
}
4171
#endif
4172
4173
#ifdef HAS_RGB24TOYJROW_AVX2
4174
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
4175
0
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4176
  // Row buffer for intermediate ARGB pixels.
4177
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4178
0
  while (width > 0) {
4179
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4180
0
    RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4181
0
    ARGBToYJRow_AVX2(row, dst_yj, twidth);
4182
0
    src_rgb24 += twidth * 3;
4183
0
    dst_yj += twidth;
4184
0
    width -= twidth;
4185
0
  }
4186
0
}
4187
#endif  // HAS_RGB24TOYJROW_AVX2
4188
4189
#ifdef HAS_RAWTOYJROW_AVX2
4190
// Convert 32 RAW pixels (128 bytes) to 32 YJ values.
4191
40
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4192
  // Row buffer for intermediate ARGB pixels.
4193
40
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4194
103k
  while (width > 0) {
4195
103k
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4196
103k
#ifdef HAS_RAWTOARGBROW_AVX2
4197
103k
    RAWToARGBRow_AVX2(src_raw, row, twidth);
4198
#else
4199
    RAWToARGBRow_SSSE3(src_raw, row, twidth);
4200
#endif
4201
103k
    ARGBToYJRow_AVX2(row, dst_yj, twidth);
4202
103k
    src_raw += twidth * 3;
4203
103k
    dst_yj += twidth;
4204
103k
    width -= twidth;
4205
103k
  }
4206
40
}
4207
#endif  // HAS_RAWTOYJROW_AVX2
4208
4209
#ifdef HAS_RGB24TOYJROW_SSSE3
4210
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
4211
0
void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4212
  // Row buffer for intermediate ARGB pixels.
4213
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4214
0
  while (width > 0) {
4215
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4216
0
    RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4217
0
    ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4218
0
    src_rgb24 += twidth * 3;
4219
0
    dst_yj += twidth;
4220
0
    width -= twidth;
4221
0
  }
4222
0
}
4223
#endif  // HAS_RGB24TOYJROW_SSSE3
4224
4225
#ifdef HAS_RAWTOYJROW_SSSE3
4226
// Convert 16 RAW pixels (64 bytes) to 16 YJ values.
4227
0
void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4228
  // Row buffer for intermediate ARGB pixels.
4229
0
  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4230
0
  while (width > 0) {
4231
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4232
0
    RAWToARGBRow_SSSE3(src_raw, row, twidth);
4233
0
    ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4234
0
    src_raw += twidth * 3;
4235
0
    dst_yj += twidth;
4236
0
    width -= twidth;
4237
0
  }
4238
0
}
4239
#endif  // HAS_RAWTOYJROW_SSSE3
4240
4241
#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
4242
void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
4243
                               const uint16_t* src_ptr,
4244
                               ptrdiff_t src_stride,
4245
                               int scale,
4246
                               int width,
4247
0
                               int source_y_fraction) {
4248
  // Row buffer for intermediate 16 bit pixels.
4249
0
  SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
4250
0
  while (width > 0) {
4251
0
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4252
0
    InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
4253
0
    Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
4254
0
    src_ptr += twidth;
4255
0
    dst_ptr += twidth;
4256
0
    width -= twidth;
4257
0
  }
4258
0
}
4259
#endif  // HAS_INTERPOLATEROW_16TO8_AVX2
4260
4261
0
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4262
0
  float fsum = 0.f;
4263
0
  int i;
4264
0
  for (i = 0; i < width; ++i) {
4265
0
    float v = *src++;
4266
0
    fsum += v * v;
4267
0
    *dst++ = v * scale;
4268
0
  }
4269
0
  return fsum;
4270
0
}
4271
4272
0
float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4273
0
  float fmax = 0.f;
4274
0
  int i;
4275
0
  for (i = 0; i < width; ++i) {
4276
0
    float v = *src++;
4277
0
    float vs = v * scale;
4278
0
    fmax = (v > fmax) ? v : fmax;
4279
0
    *dst++ = vs;
4280
0
  }
4281
0
  return fmax;
4282
0
}
4283
4284
0
void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4285
0
  int i;
4286
0
  for (i = 0; i < width; ++i) {
4287
0
    *dst++ = *src++ * scale;
4288
0
  }
4289
0
}
4290
4291
0
void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4292
0
  int i;
4293
0
  for (i = 0; i < width; ++i) {
4294
0
    *dst++ = STATIC_CAST(
4295
0
        uint16_t,
4296
0
        (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
4297
0
    ++src;
4298
0
  }
4299
0
}
4300
4301
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
4302
void GaussCol_C(const uint16_t* src0,
4303
                const uint16_t* src1,
4304
                const uint16_t* src2,
4305
                const uint16_t* src3,
4306
                const uint16_t* src4,
4307
                uint32_t* dst,
4308
0
                int width) {
4309
0
  int i;
4310
0
  for (i = 0; i < width; ++i) {
4311
0
    *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4312
0
  }
4313
0
}
4314
4315
0
void GaussRow_F32_C(const float* src, float* dst, int width) {
4316
0
  int i;
4317
0
  for (i = 0; i < width; ++i) {
4318
0
    *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4319
0
             (1.0f / 256.0f);
4320
0
    ++src;
4321
0
  }
4322
0
}
4323
4324
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
4325
void GaussCol_F32_C(const float* src0,
4326
                    const float* src1,
4327
                    const float* src2,
4328
                    const float* src3,
4329
                    const float* src4,
4330
                    float* dst,
4331
0
                    int width) {
4332
0
  int i;
4333
0
  for (i = 0; i < width; ++i) {
4334
0
    *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4335
0
  }
4336
0
}
4337
4338
// Convert biplanar NV21 to packed YUV24
4339
void NV21ToYUV24Row_C(const uint8_t* src_y,
4340
                      const uint8_t* src_vu,
4341
                      uint8_t* dst_yuv24,
4342
0
                      int width) {
4343
0
  int x;
4344
0
  for (x = 0; x < width - 1; x += 2) {
4345
0
    dst_yuv24[0] = src_vu[0];  // V
4346
0
    dst_yuv24[1] = src_vu[1];  // U
4347
0
    dst_yuv24[2] = src_y[0];   // Y0
4348
0
    dst_yuv24[3] = src_vu[0];  // V
4349
0
    dst_yuv24[4] = src_vu[1];  // U
4350
0
    dst_yuv24[5] = src_y[1];   // Y1
4351
0
    src_y += 2;
4352
0
    src_vu += 2;
4353
0
    dst_yuv24 += 6;  // Advance 2 pixels.
4354
0
  }
4355
0
  if (width & 1) {
4356
0
    dst_yuv24[0] = src_vu[0];  // V
4357
0
    dst_yuv24[1] = src_vu[1];  // U
4358
0
    dst_yuv24[2] = src_y[0];   // Y0
4359
0
  }
4360
0
}
4361
4362
// Filter 2 rows of AYUV UV's (444) into UV (420).
4363
// AYUV is VUYA in memory.  UV for NV12 is UV order in memory.
4364
void AYUVToUVRow_C(const uint8_t* src_ayuv,
4365
                   int src_stride_ayuv,
4366
                   uint8_t* dst_uv,
4367
0
                   int width) {
4368
  // Output a row of UV values, filtering 2x2 rows of AYUV.
4369
0
  int x;
4370
0
  for (x = 0; x < width - 1; x += 2) {
4371
0
    dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4372
0
                 src_ayuv[src_stride_ayuv + 5] + 2) >>
4373
0
                2;
4374
0
    dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4375
0
                 src_ayuv[src_stride_ayuv + 4] + 2) >>
4376
0
                2;
4377
0
    src_ayuv += 8;
4378
0
    dst_uv += 2;
4379
0
  }
4380
0
  if (width & 1) {
4381
0
    dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4382
0
    dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4383
0
  }
4384
0
}
4385
4386
// Filter 2 rows of AYUV UV's (444) into VU (420).
4387
void AYUVToVURow_C(const uint8_t* src_ayuv,
4388
                   int src_stride_ayuv,
4389
                   uint8_t* dst_vu,
4390
0
                   int width) {
4391
  // Output a row of VU values, filtering 2x2 rows of AYUV.
4392
0
  int x;
4393
0
  for (x = 0; x < width - 1; x += 2) {
4394
0
    dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4395
0
                 src_ayuv[src_stride_ayuv + 4] + 2) >>
4396
0
                2;
4397
0
    dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4398
0
                 src_ayuv[src_stride_ayuv + 5] + 2) >>
4399
0
                2;
4400
0
    src_ayuv += 8;
4401
0
    dst_vu += 2;
4402
0
  }
4403
0
  if (width & 1) {
4404
0
    dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4405
0
    dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4406
0
  }
4407
0
}
4408
4409
// Copy row of AYUV Y's into Y
4410
0
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4411
  // Output a row of Y values.
4412
0
  int x;
4413
0
  for (x = 0; x < width; ++x) {
4414
0
    dst_y[x] = src_ayuv[2];  // v,u,y,a
4415
0
    src_ayuv += 4;
4416
0
  }
4417
0
}
4418
4419
// Convert UV plane of NV12 to VU of NV21.
4420
0
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4421
0
  int x;
4422
0
  for (x = 0; x < width; ++x) {
4423
0
    uint8_t u = src_uv[0];
4424
0
    uint8_t v = src_uv[1];
4425
0
    dst_vu[0] = v;
4426
0
    dst_vu[1] = u;
4427
0
    src_uv += 2;
4428
0
    dst_vu += 2;
4429
0
  }
4430
0
}
4431
4432
void HalfMergeUVRow_C(const uint8_t* src_u,
4433
                      int src_stride_u,
4434
                      const uint8_t* src_v,
4435
                      int src_stride_v,
4436
                      uint8_t* dst_uv,
4437
0
                      int width) {
4438
0
  int x;
4439
0
  for (x = 0; x < width - 1; x += 2) {
4440
0
    dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4441
0
                 src_u[src_stride_u + 1] + 2) >>
4442
0
                2;
4443
0
    dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4444
0
                 src_v[src_stride_v + 1] + 2) >>
4445
0
                2;
4446
0
    src_u += 2;
4447
0
    src_v += 2;
4448
0
    dst_uv += 2;
4449
0
  }
4450
0
  if (width & 1) {
4451
0
    dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4452
0
    dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4453
0
  }
4454
0
}
4455
4456
#undef STATIC_CAST
4457
4458
#ifdef __cplusplus
4459
}  // extern "C"
4460
}  // namespace libyuv
4461
#endif