Coverage Report

Created: 2023-09-25 06:23

/src/libwebp/sharpyuv/sharpyuv.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2022 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Sharp RGB to YUV conversion.
11
//
12
// Author: Skal (pascal.massimino@gmail.com)
13
14
#include "sharpyuv/sharpyuv.h"
15
16
#include <assert.h>
17
#include <limits.h>
18
#include <stddef.h>
19
#include <stdlib.h>
20
#include <string.h>
21
22
#include "src/webp/types.h"
23
#include "sharpyuv/sharpyuv_cpu.h"
24
#include "sharpyuv/sharpyuv_dsp.h"
25
#include "sharpyuv/sharpyuv_gamma.h"
26
27
//------------------------------------------------------------------------------
28
29
0
int SharpYuvGetVersion(void) {
30
0
  return SHARPYUV_VERSION;
31
0
}
32
33
//------------------------------------------------------------------------------
34
// Sharp RGB->YUV conversion
35
36
static const int kNumIterations = 4;
37
38
6.87M
#define YUV_FIX 16  // fixed-point precision for RGB->YUV
39
static const int kYuvHalf = 1 << (YUV_FIX - 1);
40
41
// Max bit depth so that intermediate calculations fit in 16 bits.
42
static const int kMaxBitDepth = 14;
43
44
// Returns the precision shift to use based on the input rgb_bit_depth.
45
2.93M
static int GetPrecisionShift(int rgb_bit_depth) {
46
  // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
47
  // bits if needed.
48
2.93M
  return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
49
2.93M
                                               : (kMaxBitDepth - rgb_bit_depth);
50
2.93M
}
51
52
typedef int16_t fixed_t;      // signed type with extra precision for UV
53
typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
54
55
//------------------------------------------------------------------------------
56
57
1.31M
static uint8_t clip_8b(fixed_t v) {
58
1.31M
  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
59
1.31M
}
60
61
0
static uint16_t clip(fixed_t v, int max) {
62
0
  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
63
0
}
64
65
128k
static fixed_y_t clip_bit_depth(int y, int bit_depth) {
66
128k
  const int max = (1 << bit_depth) - 1;
67
128k
  return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
68
128k
}
69
70
//------------------------------------------------------------------------------
71
72
4.23M
static int RGBToGray(int64_t r, int64_t g, int64_t b) {
73
4.23M
  const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
74
4.23M
  return (int)(luma >> YUV_FIX);
75
4.23M
}
76
77
static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
78
                          int rgb_bit_depth,
79
2.00M
                          SharpYuvTransferFunctionType transfer_type) {
80
2.00M
  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
81
2.00M
  const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type);
82
2.00M
  const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type);
83
2.00M
  const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type);
84
2.00M
  const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type);
85
2.00M
  return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth,
86
2.00M
                               transfer_type);
87
2.00M
}
88
89
static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
90
                                int rgb_bit_depth,
91
31.4k
                                SharpYuvTransferFunctionType transfer_type) {
92
31.4k
  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
93
31.4k
  int i;
94
2.70M
  for (i = 0; i < w; ++i) {
95
2.67M
    const uint32_t R =
96
2.67M
        SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type);
97
2.67M
    const uint32_t G =
98
2.67M
        SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type);
99
2.67M
    const uint32_t B =
100
2.67M
        SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type);
101
2.67M
    const uint32_t Y = RGBToGray(R, G, B);
102
2.67M
    dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type);
103
2.67M
  }
104
31.4k
}
105
106
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
107
                         fixed_t* dst, int uv_w, int rgb_bit_depth,
108
15.7k
                         SharpYuvTransferFunctionType transfer_type) {
109
15.7k
  int i;
110
685k
  for (i = 0; i < uv_w; ++i) {
111
669k
    const int r =
112
669k
        ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
113
669k
                  src2[0 * uv_w + 1], rgb_bit_depth, transfer_type);
114
669k
    const int g =
115
669k
        ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
116
669k
                  src2[2 * uv_w + 1], rgb_bit_depth, transfer_type);
117
669k
    const int b =
118
669k
        ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
119
669k
                  src2[4 * uv_w + 1], rgb_bit_depth, transfer_type);
120
669k
    const int W = RGBToGray(r, g, b);
121
669k
    dst[0 * uv_w] = (fixed_t)(r - W);
122
669k
    dst[1 * uv_w] = (fixed_t)(g - W);
123
669k
    dst[2 * uv_w] = (fixed_t)(b - W);
124
669k
    dst  += 1;
125
669k
    src1 += 2;
126
669k
    src2 += 2;
127
669k
  }
128
15.7k
}
129
130
10.0k
static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
131
10.0k
  int i;
132
10.0k
  assert(w > 0);
133
896k
  for (i = 0; i < w; ++i) {
134
886k
    y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
135
886k
  }
136
10.0k
}
137
138
//------------------------------------------------------------------------------
139
140
128k
static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
141
128k
  const int v0 = (A * 3 + B + 2) >> 2;
142
128k
  return clip_bit_depth(v0 + W0, bit_depth);
143
128k
}
144
145
//------------------------------------------------------------------------------
146
147
2.62M
static WEBP_INLINE int Shift(int v, int shift) {
148
2.62M
  return (shift >= 0) ? (v << shift) : (v >> -shift);
149
2.62M
}
150
151
static void ImportOneRow(const uint8_t* const r_ptr,
152
                         const uint8_t* const g_ptr,
153
                         const uint8_t* const b_ptr,
154
                         int rgb_step,
155
                         int rgb_bit_depth,
156
                         int pic_width,
157
9.95k
                         fixed_y_t* const dst) {
158
  // Convert the rgb_step from a number of bytes to a number of uint8_t or
159
  // uint16_t values depending the bit depth.
160
9.95k
  const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
161
9.95k
  int i;
162
9.95k
  const int w = (pic_width + 1) & ~1;
163
885k
  for (i = 0; i < pic_width; ++i) {
164
875k
    const int off = i * step;
165
875k
    const int shift = GetPrecisionShift(rgb_bit_depth);
166
875k
    if (rgb_bit_depth == 8) {
167
875k
      dst[i + 0 * w] = Shift(r_ptr[off], shift);
168
875k
      dst[i + 1 * w] = Shift(g_ptr[off], shift);
169
875k
      dst[i + 2 * w] = Shift(b_ptr[off], shift);
170
875k
    } else {
171
0
      dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
172
0
      dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
173
0
      dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
174
0
    }
175
875k
  }
176
9.95k
  if (pic_width & 1) {  // replicate rightmost pixel
177
5.35k
    dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
178
5.35k
    dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
179
5.35k
    dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
180
5.35k
  }
181
9.95k
}
182
183
static void InterpolateTwoRows(const fixed_y_t* const best_y,
184
                               const fixed_t* prev_uv,
185
                               const fixed_t* cur_uv,
186
                               const fixed_t* next_uv,
187
                               int w,
188
                               fixed_y_t* out1,
189
                               fixed_y_t* out2,
190
10.7k
                               int rgb_bit_depth) {
191
10.7k
  const int uv_w = w >> 1;
192
10.7k
  const int len = (w - 1) >> 1;   // length to filter
193
10.7k
  int k = 3;
194
10.7k
  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
195
42.8k
  while (k-- > 0) {   // process each R/G/B segments in turn
196
    // special boundary case for i==0
197
32.1k
    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
198
32.1k
    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
199
200
32.1k
    SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
201
32.1k
                      bit_depth);
202
32.1k
    SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
203
32.1k
                      bit_depth);
204
205
    // special boundary case for i == w - 1 when w is even
206
32.1k
    if (!(w & 1)) {
207
32.1k
      out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
208
32.1k
                            best_y[w - 1 + 0], bit_depth);
209
32.1k
      out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
210
32.1k
                            best_y[w - 1 + w], bit_depth);
211
32.1k
    }
212
32.1k
    out1 += w;
213
32.1k
    out2 += w;
214
32.1k
    prev_uv += uv_w;
215
32.1k
    cur_uv  += uv_w;
216
32.1k
    next_uv += uv_w;
217
32.1k
  }
218
10.7k
}
219
220
static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
221
1.31M
                                         const int coeffs[4], int sfix) {
222
1.31M
  const int srounder = 1 << (YUV_FIX + sfix - 1);
223
1.31M
  const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
224
1.31M
                   coeffs[3] + srounder;
225
1.31M
  return (luma >> (YUV_FIX + sfix));
226
1.31M
}
227
228
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
229
                            uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
230
                            int u_stride, uint8_t* v_ptr, int v_stride,
231
                            int rgb_bit_depth,
232
                            int yuv_bit_depth, int width, int height,
233
204
                            const SharpYuvConversionMatrix* yuv_matrix) {
234
204
  int i, j;
235
204
  const fixed_t* const best_uv_base = best_uv;
236
204
  const int w = (width + 1) & ~1;
237
204
  const int h = (height + 1) & ~1;
238
204
  const int uv_w = w >> 1;
239
204
  const int uv_h = h >> 1;
240
204
  const int sfix = GetPrecisionShift(rgb_bit_depth);
241
204
  const int yuv_max = (1 << yuv_bit_depth) - 1;
242
243
10.1k
  for (best_uv = best_uv_base, j = 0; j < height; ++j) {
244
885k
    for (i = 0; i < width; ++i) {
245
875k
      const int off = (i >> 1);
246
875k
      const int W = best_y[i];
247
875k
      const int r = best_uv[off + 0 * uv_w] + W;
248
875k
      const int g = best_uv[off + 1 * uv_w] + W;
249
875k
      const int b = best_uv[off + 2 * uv_w] + W;
250
875k
      const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
251
875k
      if (yuv_bit_depth <= 8) {
252
875k
        y_ptr[i] = clip_8b(y);
253
875k
      } else {
254
0
        ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
255
0
      }
256
875k
    }
257
9.95k
    best_y += w;
258
9.95k
    best_uv += (j & 1) * 3 * uv_w;
259
9.95k
    y_ptr += y_stride;
260
9.95k
  }
261
5.24k
  for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
262
226k
    for (i = 0; i < uv_w; ++i) {
263
221k
      const int off = i;
264
      // Note r, g and b values here are off by W, but a constant offset on all
265
      // 3 components doesn't change the value of u and v with a YCbCr matrix.
266
221k
      const int r = best_uv[off + 0 * uv_w];
267
221k
      const int g = best_uv[off + 1 * uv_w];
268
221k
      const int b = best_uv[off + 2 * uv_w];
269
221k
      const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
270
221k
      const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
271
221k
      if (yuv_bit_depth <= 8) {
272
221k
        u_ptr[i] = clip_8b(u);
273
221k
        v_ptr[i] = clip_8b(v);
274
221k
      } else {
275
0
        ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
276
0
        ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
277
0
      }
278
221k
    }
279
5.04k
    best_uv += 3 * uv_w;
280
5.04k
    u_ptr += u_stride;
281
5.04k
    v_ptr += v_stride;
282
5.04k
  }
283
204
  return 1;
284
204
}
285
286
//------------------------------------------------------------------------------
287
// Main function
288
289
1.42k
static void* SafeMalloc(uint64_t nmemb, size_t size) {
290
1.42k
  const uint64_t total_size = nmemb * (uint64_t)size;
291
1.42k
  if (total_size != (size_t)total_size) return NULL;
292
1.42k
  return malloc((size_t)total_size);
293
1.42k
}
294
295
1.42k
#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
296
297
static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
298
                            const uint8_t* b_ptr, int rgb_step, int rgb_stride,
299
                            int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
300
                            uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
301
                            int v_stride, int yuv_bit_depth, int width,
302
                            int height,
303
                            const SharpYuvConversionMatrix* yuv_matrix,
304
204
                            SharpYuvTransferFunctionType transfer_type) {
305
  // we expand the right/bottom border if needed
306
204
  const int w = (width + 1) & ~1;
307
204
  const int h = (height + 1) & ~1;
308
204
  const int uv_w = w >> 1;
309
204
  const int uv_h = h >> 1;
310
204
  uint64_t prev_diff_y_sum = ~0;
311
204
  int j, iter;
312
313
  // TODO(skal): allocate one big memory chunk. But for now, it's easier
314
  // for valgrind debugging to have several chunks.
315
204
  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
316
204
  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
317
204
  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
318
204
  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
319
204
  fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
320
204
  fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
321
204
  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
322
204
  fixed_y_t* best_y = best_y_base;
323
204
  fixed_y_t* target_y = target_y_base;
324
204
  fixed_t* best_uv = best_uv_base;
325
204
  fixed_t* target_uv = target_uv_base;
326
204
  const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
327
204
  int ok;
328
204
  assert(w > 0);
329
204
  assert(h > 0);
330
331
204
  if (best_y_base == NULL || best_uv_base == NULL ||
332
204
      target_y_base == NULL || target_uv_base == NULL ||
333
204
      best_rgb_y == NULL || best_rgb_uv == NULL ||
334
204
      tmp_buffer == NULL) {
335
0
    ok = 0;
336
0
    goto End;
337
0
  }
338
339
  // Import RGB samples to W/RGB representation.
340
5.24k
  for (j = 0; j < height; j += 2) {
341
5.04k
    const int is_last_row = (j == height - 1);
342
5.04k
    fixed_y_t* const src1 = tmp_buffer + 0 * w;
343
5.04k
    fixed_y_t* const src2 = tmp_buffer + 3 * w;
344
345
    // prepare two rows of input
346
5.04k
    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
347
5.04k
                 src1);
348
5.04k
    if (!is_last_row) {
349
4.90k
      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
350
4.90k
                   rgb_step, rgb_bit_depth, width, src2);
351
4.90k
    } else {
352
135
      memcpy(src2, src1, 3 * w * sizeof(*src2));
353
135
    }
354
5.04k
    StoreGray(src1, best_y + 0, w);
355
5.04k
    StoreGray(src2, best_y + w, w);
356
357
5.04k
    UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type);
358
5.04k
    UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type);
359
5.04k
    UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type);
360
5.04k
    memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
361
5.04k
    best_y += 2 * w;
362
5.04k
    best_uv += 3 * uv_w;
363
5.04k
    target_y += 2 * w;
364
5.04k
    target_uv += 3 * uv_w;
365
5.04k
    r_ptr += 2 * rgb_stride;
366
5.04k
    g_ptr += 2 * rgb_stride;
367
5.04k
    b_ptr += 2 * rgb_stride;
368
5.04k
  }
369
370
  // Iterate and resolve clipping conflicts.
371
515
  for (iter = 0; iter < kNumIterations; ++iter) {
372
490
    const fixed_t* cur_uv = best_uv_base;
373
490
    const fixed_t* prev_uv = best_uv_base;
374
490
    uint64_t diff_y_sum = 0;
375
376
490
    best_y = best_y_base;
377
490
    best_uv = best_uv_base;
378
490
    target_y = target_y_base;
379
490
    target_uv = target_uv_base;
380
11.1k
    for (j = 0; j < h; j += 2) {
381
10.7k
      fixed_y_t* const src1 = tmp_buffer + 0 * w;
382
10.7k
      fixed_y_t* const src2 = tmp_buffer + 3 * w;
383
10.7k
      {
384
10.7k
        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
385
10.7k
        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
386
10.7k
                           src1, src2, rgb_bit_depth);
387
10.7k
        prev_uv = cur_uv;
388
10.7k
        cur_uv = next_uv;
389
10.7k
      }
390
391
10.7k
      UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type);
392
10.7k
      UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type);
393
10.7k
      UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type);
394
395
      // update two rows of Y and one row of RGB
396
10.7k
      diff_y_sum +=
397
10.7k
          SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
398
10.7k
                          rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
399
10.7k
      SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
400
401
10.7k
      best_y += 2 * w;
402
10.7k
      best_uv += 3 * uv_w;
403
10.7k
      target_y += 2 * w;
404
10.7k
      target_uv += 3 * uv_w;
405
10.7k
    }
406
    // test exit condition
407
490
    if (iter > 0) {
408
286
      if (diff_y_sum < diff_y_threshold) break;
409
135
      if (diff_y_sum > prev_diff_y_sum) break;
410
135
    }
411
311
    prev_diff_y_sum = diff_y_sum;
412
311
  }
413
414
  // final reconstruction
415
204
  ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
416
204
                        u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
417
204
                        width, height, yuv_matrix);
418
419
204
 End:
420
204
  free(best_y_base);
421
204
  free(best_uv_base);
422
204
  free(target_y_base);
423
204
  free(target_uv_base);
424
204
  free(best_rgb_y);
425
204
  free(best_rgb_uv);
426
204
  free(tmp_buffer);
427
204
  return ok;
428
204
}
429
#undef SAFE_ALLOC
430
431
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
432
#include <pthread.h>  // NOLINT
433
434
#define LOCK_ACCESS \
435
408
    static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
436
408
    if (pthread_mutex_lock(&sharpyuv_lock)) return
437
#define UNLOCK_ACCESS_AND_RETURN                  \
438
408
    do {                                          \
439
408
      (void)pthread_mutex_unlock(&sharpyuv_lock); \
440
408
      return;                                     \
441
408
    } while (0)
442
#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
443
#define LOCK_ACCESS do {} while (0)
444
#define UNLOCK_ACCESS_AND_RETURN return
445
#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
446
447
// Hidden exported init function.
448
// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,
449
// users can declare it as extern and call it with an alternate VP8CPUInfo
450
// function.
451
extern VP8CPUInfo SharpYuvGetCPUInfo;
452
SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);
453
408
void SharpYuvInit(VP8CPUInfo cpu_info_func) {
454
408
  static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
455
408
      (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
456
408
  LOCK_ACCESS;
457
  // Only update SharpYuvGetCPUInfo when called from external code to avoid a
458
  // race on reading the value in SharpYuvConvert().
459
408
  if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) {
460
204
    SharpYuvGetCPUInfo = cpu_info_func;
461
204
  }
462
408
  if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) {
463
304
    UNLOCK_ACCESS_AND_RETURN;
464
304
  }
465
466
104
  SharpYuvInitDsp();
467
104
  SharpYuvInitGammaTables();
468
469
104
  sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;
470
104
  UNLOCK_ACCESS_AND_RETURN;
471
104
}
472
473
int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
474
                    int rgb_step, int rgb_stride, int rgb_bit_depth,
475
                    void* y_ptr, int y_stride, void* u_ptr, int u_stride,
476
                    void* v_ptr, int v_stride, int yuv_bit_depth, int width,
477
204
                    int height, const SharpYuvConversionMatrix* yuv_matrix) {
478
204
  SharpYuvOptions options;
479
204
  options.yuv_matrix = yuv_matrix;
480
204
  options.transfer_type = kSharpYuvTransferFunctionSrgb;
481
204
  return SharpYuvConvertWithOptions(
482
204
      r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride,
483
204
      u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options);
484
204
}
485
486
int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix,
487
0
                                SharpYuvOptions* options, int version) {
488
0
  const int major = (version >> 24);
489
0
  const int minor = (version >> 16) & 0xff;
490
0
  if (options == NULL || yuv_matrix == NULL ||
491
0
      (major == SHARPYUV_VERSION_MAJOR && major == 0 &&
492
0
       minor != SHARPYUV_VERSION_MINOR) ||
493
0
      (major != SHARPYUV_VERSION_MAJOR)) {
494
0
    return 0;
495
0
  }
496
0
  options->yuv_matrix = yuv_matrix;
497
0
  options->transfer_type = kSharpYuvTransferFunctionSrgb;
498
0
  return 1;
499
0
}
500
501
int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
502
                               const void* b_ptr, int rgb_step, int rgb_stride,
503
                               int rgb_bit_depth, void* y_ptr, int y_stride,
504
                               void* u_ptr, int u_stride, void* v_ptr,
505
                               int v_stride, int yuv_bit_depth, int width,
506
204
                               int height, const SharpYuvOptions* options) {
507
204
  const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix;
508
204
  SharpYuvTransferFunctionType transfer_type = options->transfer_type;
509
204
  SharpYuvConversionMatrix scaled_matrix;
510
204
  const int rgb_max = (1 << rgb_bit_depth) - 1;
511
204
  const int rgb_round = 1 << (rgb_bit_depth - 1);
512
204
  const int yuv_max = (1 << yuv_bit_depth) - 1;
513
204
  const int sfix = GetPrecisionShift(rgb_bit_depth);
514
515
204
  if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
516
204
      r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
517
204
      u_ptr == NULL || v_ptr == NULL) {
518
0
    return 0;
519
0
  }
520
204
  if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
521
204
      rgb_bit_depth != 16) {
522
0
    return 0;
523
0
  }
524
204
  if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
525
0
    return 0;
526
0
  }
527
204
  if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
528
    // Step/stride should be even for uint16_t buffers.
529
0
    return 0;
530
0
  }
531
204
  if (yuv_bit_depth > 8 &&
532
204
      (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
533
    // Stride should be even for uint16_t buffers.
534
0
    return 0;
535
0
  }
536
  // The address of the function pointer is used to avoid a read race.
537
204
  SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);
538
539
  // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
540
  // rgb->yuv conversion matrix.
541
204
  if (rgb_bit_depth == yuv_bit_depth) {
542
204
    memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
543
204
  } else {
544
0
    int i;
545
0
    for (i = 0; i < 3; ++i) {
546
0
      scaled_matrix.rgb_to_y[i] =
547
0
          (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
548
0
      scaled_matrix.rgb_to_u[i] =
549
0
          (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
550
0
      scaled_matrix.rgb_to_v[i] =
551
0
          (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
552
0
    }
553
0
  }
554
  // Also incorporate precision change scaling.
555
204
  scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
556
204
  scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
557
204
  scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
558
559
204
  return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
560
204
                          rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
561
204
                          v_ptr, v_stride, yuv_bit_depth, width, height,
562
204
                          &scaled_matrix, transfer_type);
563
204
}
564
565
//------------------------------------------------------------------------------