/src/libwebp/sharpyuv/sharpyuv.c
Line | Count | Source |
1 | | // Copyright 2022 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // Sharp RGB to YUV conversion. |
11 | | // |
12 | | // Author: Skal (pascal.massimino@gmail.com) |
13 | | |
14 | | #include "./sharpyuv.h" |
15 | | |
16 | | #include <assert.h> |
17 | | #include <limits.h> |
18 | | #include <stddef.h> |
19 | | #include <stdlib.h> |
20 | | #include <string.h> |
21 | | |
22 | | #include "./sharpyuv_cpu.h" |
23 | | #include "./sharpyuv_dsp.h" |
24 | | #include "./sharpyuv_gamma.h" |
25 | | #include "webp/types.h" |
26 | | |
27 | | //------------------------------------------------------------------------------ |
28 | | |
29 | 0 | int SharpYuvGetVersion(void) { return SHARPYUV_VERSION; } |
30 | | |
31 | | //------------------------------------------------------------------------------ |
32 | | // Sharp RGB->YUV conversion |
33 | | |
34 | | static const int kNumIterations = 4; |
35 | | |
36 | 0 | #define YUV_FIX 16 // fixed-point precision for RGB->YUV |
37 | | static const int kYuvHalf = 1 << (YUV_FIX - 1); |
38 | | |
39 | | // Max bit depth so that intermediate calculations fit in 16 bits. |
40 | | static const int kMaxBitDepth = 14; |
41 | | |
42 | | // Returns the precision shift to use based on the input rgb_bit_depth. |
43 | 0 | static int GetPrecisionShift(int rgb_bit_depth) { |
44 | | // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove |
45 | | // bits if needed. |
46 | 0 | return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 |
47 | 0 | : (kMaxBitDepth - rgb_bit_depth); |
48 | 0 | } |
49 | | |
50 | | typedef int16_t fixed_t; // signed type with extra precision for UV |
51 | | typedef uint16_t fixed_y_t; // unsigned type with extra precision for W |
52 | | |
53 | | //------------------------------------------------------------------------------ |
54 | | |
55 | 0 | static uint8_t clip_8b(fixed_t v) { |
56 | 0 | return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; |
57 | 0 | } |
58 | | |
59 | 0 | static uint16_t clip(fixed_t v, int max) { |
60 | 0 | return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; |
61 | 0 | } |
62 | | |
63 | 0 | static fixed_y_t clip_bit_depth(int y, int bit_depth) { |
64 | 0 | const int max = (1 << bit_depth) - 1; |
65 | 0 | return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; |
66 | 0 | } |
67 | | |
68 | | //------------------------------------------------------------------------------ |
69 | | |
70 | 0 | static int RGBToGray(int64_t r, int64_t g, int64_t b) { |
71 | 0 | const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; |
72 | 0 | return (int)(luma >> YUV_FIX); |
73 | 0 | } |
74 | | |
75 | | static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
76 | | int bit_depth, |
77 | 0 | SharpYuvTransferFunctionType transfer_type) { |
78 | 0 | const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type); |
79 | 0 | const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type); |
80 | 0 | const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type); |
81 | 0 | const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type); |
82 | 0 | return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth, |
83 | 0 | transfer_type); |
84 | 0 | } |
85 | | |
86 | | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, |
87 | | int bit_depth, |
88 | 0 | SharpYuvTransferFunctionType transfer_type) { |
89 | 0 | int i = 0; |
90 | 0 | do { |
91 | 0 | const uint32_t R = |
92 | 0 | SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type); |
93 | 0 | const uint32_t G = |
94 | 0 | SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type); |
95 | 0 | const uint32_t B = |
96 | 0 | SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type); |
97 | 0 | const uint32_t Y = RGBToGray(R, G, B); |
98 | 0 | dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type); |
99 | 0 | } while (++i < w); |
100 | 0 | } |
101 | | |
102 | | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, |
103 | | fixed_t* dst, int uv_w, int bit_depth, |
104 | 0 | SharpYuvTransferFunctionType transfer_type) { |
105 | 0 | int i = 0; |
106 | 0 | do { |
107 | 0 | const int r = |
108 | 0 | ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0], |
109 | 0 | src2[0 * uv_w + 1], bit_depth, transfer_type); |
110 | 0 | const int g = |
111 | 0 | ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0], |
112 | 0 | src2[2 * uv_w + 1], bit_depth, transfer_type); |
113 | 0 | const int b = |
114 | 0 | ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0], |
115 | 0 | src2[4 * uv_w + 1], bit_depth, transfer_type); |
116 | 0 | const int W = RGBToGray(r, g, b); |
117 | 0 | dst[0 * uv_w] = (fixed_t)(r - W); |
118 | 0 | dst[1 * uv_w] = (fixed_t)(g - W); |
119 | 0 | dst[2 * uv_w] = (fixed_t)(b - W); |
120 | 0 | dst += 1; |
121 | 0 | src1 += 2; |
122 | 0 | src2 += 2; |
123 | 0 | } while (++i < uv_w); |
124 | 0 | } |
125 | | |
126 | 0 | static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { |
127 | 0 | int i = 0; |
128 | 0 | assert(w > 0); |
129 | 0 | do { |
130 | 0 | y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); |
131 | 0 | } while (++i < w); |
132 | 0 | } |
133 | | |
134 | | //------------------------------------------------------------------------------ |
135 | | |
136 | 0 | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { |
137 | 0 | const int v0 = (A * 3 + B + 2) >> 2; |
138 | 0 | return clip_bit_depth(v0 + W0, bit_depth); |
139 | 0 | } |
140 | | |
141 | | //------------------------------------------------------------------------------ |
142 | | |
143 | 0 | static WEBP_INLINE int Shift(int v, int shift) { |
144 | 0 | return (shift >= 0) ? (v << shift) : (v >> -shift); |
145 | 0 | } |
146 | | |
147 | | static void ImportOneRow(const uint8_t* const r_ptr, const uint8_t* const g_ptr, |
148 | | const uint8_t* const b_ptr, int rgb_step, |
149 | | int rgb_bit_depth, int pic_width, |
150 | 0 | fixed_y_t* const dst) { |
151 | | // Convert the rgb_step from a number of bytes to a number of uint8_t or |
152 | | // uint16_t values depending the bit depth. |
153 | 0 | const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; |
154 | 0 | const int w = (pic_width + 1) & ~1; |
155 | 0 | const int shift = GetPrecisionShift(rgb_bit_depth); |
156 | 0 | const int max_val = (1 << rgb_bit_depth) - 1; |
157 | 0 | int i = 0; |
158 | |
|
159 | 0 | if (rgb_bit_depth == 8) { |
160 | 0 | do { |
161 | 0 | const int off = i * step; |
162 | 0 | dst[i + 0 * w] = Shift(r_ptr[off], shift); |
163 | 0 | dst[i + 1 * w] = Shift(g_ptr[off], shift); |
164 | 0 | dst[i + 2 * w] = Shift(b_ptr[off], shift); |
165 | 0 | } while (++i < pic_width); |
166 | 0 | } else if (rgb_bit_depth < 16) { |
167 | 0 | do { |
168 | 0 | const int off = i * step; |
169 | 0 | int r = ((const uint16_t*)r_ptr)[off]; |
170 | 0 | int g = ((const uint16_t*)g_ptr)[off]; |
171 | 0 | int b = ((const uint16_t*)b_ptr)[off]; |
172 | 0 | dst[i + 0 * w] = Shift(r > max_val ? max_val : r, shift); |
173 | 0 | dst[i + 1 * w] = Shift(g > max_val ? max_val : g, shift); |
174 | 0 | dst[i + 2 * w] = Shift(b > max_val ? max_val : b, shift); |
175 | 0 | } while (++i < pic_width); |
176 | 0 | } else { // rgb_bit_depth == 16 |
177 | 0 | do { |
178 | 0 | const int off = i * step; |
179 | 0 | int r = ((const uint16_t*)r_ptr)[off]; |
180 | 0 | int g = ((const uint16_t*)g_ptr)[off]; |
181 | 0 | int b = ((const uint16_t*)b_ptr)[off]; |
182 | 0 | dst[i + 0 * w] = Shift(r, shift); |
183 | 0 | dst[i + 1 * w] = Shift(g, shift); |
184 | 0 | dst[i + 2 * w] = Shift(b, shift); |
185 | 0 | } while (++i < pic_width); |
186 | 0 | } |
187 | |
|
188 | 0 | if (pic_width & 1) { // replicate rightmost pixel |
189 | 0 | dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; |
190 | 0 | dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; |
191 | 0 | dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; |
192 | 0 | } |
193 | 0 | } |
194 | | |
195 | | static void InterpolateTwoRows(const fixed_y_t* const best_y, |
196 | | const fixed_t* prev_uv, const fixed_t* cur_uv, |
197 | | const fixed_t* next_uv, int w, fixed_y_t* out1, |
198 | 0 | fixed_y_t* out2, int bit_depth) { |
199 | 0 | const int uv_w = w >> 1; |
200 | 0 | const int len = (w - 1) >> 1; // length to filter |
201 | 0 | int k = 3; |
202 | 0 | while (k-- > 0) { // process each R/G/B segments in turn |
203 | | // special boundary case for i==0 |
204 | 0 | out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth); |
205 | 0 | out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth); |
206 | |
|
207 | 0 | SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, |
208 | 0 | bit_depth); |
209 | 0 | SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, |
210 | 0 | bit_depth); |
211 | | |
212 | | // special boundary case for i == w - 1 when w is even |
213 | 0 | if (!(w & 1)) { |
214 | 0 | out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], |
215 | 0 | best_y[w - 1 + 0], bit_depth); |
216 | 0 | out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], |
217 | 0 | best_y[w - 1 + w], bit_depth); |
218 | 0 | } |
219 | 0 | out1 += w; |
220 | 0 | out2 += w; |
221 | 0 | prev_uv += uv_w; |
222 | 0 | cur_uv += uv_w; |
223 | 0 | next_uv += uv_w; |
224 | 0 | } |
225 | 0 | } |
226 | | |
227 | | static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, |
228 | 0 | const int coeffs[4], int sfix) { |
229 | 0 | const int srounder = 1 << (YUV_FIX + sfix - 1); |
230 | 0 | const int luma = |
231 | 0 | coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + coeffs[3] + srounder; |
232 | 0 | return (luma >> (YUV_FIX + sfix)); |
233 | 0 | } |
234 | | |
235 | | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, |
236 | | uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, |
237 | | int u_stride, uint8_t* v_ptr, int v_stride, |
238 | | int rgb_bit_depth, int yuv_bit_depth, int width, |
239 | | int height, |
240 | 0 | const SharpYuvConversionMatrix* yuv_matrix) { |
241 | 0 | int i, j; |
242 | 0 | const fixed_t* const best_uv_base = best_uv; |
243 | 0 | const int w = (width + 1) & ~1; |
244 | 0 | const int h = (height + 1) & ~1; |
245 | 0 | const int uv_w = w >> 1; |
246 | 0 | const int uv_h = h >> 1; |
247 | 0 | const int sfix = GetPrecisionShift(rgb_bit_depth); |
248 | 0 | const int yuv_max = (1 << yuv_bit_depth) - 1; |
249 | |
|
250 | 0 | best_uv = best_uv_base; |
251 | 0 | j = 0; |
252 | 0 | do { |
253 | 0 | i = 0; |
254 | 0 | do { |
255 | 0 | const int off = (i >> 1); |
256 | 0 | const int W = best_y[i]; |
257 | 0 | const int r = best_uv[off + 0 * uv_w] + W; |
258 | 0 | const int g = best_uv[off + 1 * uv_w] + W; |
259 | 0 | const int b = best_uv[off + 2 * uv_w] + W; |
260 | 0 | const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix); |
261 | 0 | if (yuv_bit_depth <= 8) { |
262 | 0 | y_ptr[i] = clip_8b(y); |
263 | 0 | } else { |
264 | 0 | ((uint16_t*)y_ptr)[i] = clip(y, yuv_max); |
265 | 0 | } |
266 | 0 | } while (++i < width); |
267 | 0 | best_y += w; |
268 | 0 | best_uv += (j & 1) * 3 * uv_w; |
269 | 0 | y_ptr += y_stride; |
270 | 0 | } while (++j < height); |
271 | |
|
272 | 0 | best_uv = best_uv_base; |
273 | 0 | j = 0; |
274 | 0 | do { |
275 | 0 | i = 0; |
276 | 0 | do { |
277 | | // Note r, g and b values here are off by W, but a constant offset on all |
278 | | // 3 components doesn't change the value of u and v with a YCbCr matrix. |
279 | 0 | const int r = best_uv[i + 0 * uv_w]; |
280 | 0 | const int g = best_uv[i + 1 * uv_w]; |
281 | 0 | const int b = best_uv[i + 2 * uv_w]; |
282 | 0 | const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix); |
283 | 0 | const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix); |
284 | 0 | if (yuv_bit_depth <= 8) { |
285 | 0 | u_ptr[i] = clip_8b(u); |
286 | 0 | v_ptr[i] = clip_8b(v); |
287 | 0 | } else { |
288 | 0 | ((uint16_t*)u_ptr)[i] = clip(u, yuv_max); |
289 | 0 | ((uint16_t*)v_ptr)[i] = clip(v, yuv_max); |
290 | 0 | } |
291 | 0 | } while (++i < uv_w); |
292 | 0 | best_uv += 3 * uv_w; |
293 | 0 | u_ptr += u_stride; |
294 | 0 | v_ptr += v_stride; |
295 | 0 | } while (++j < uv_h); |
296 | 0 | return 1; |
297 | 0 | } |
298 | | |
299 | | //------------------------------------------------------------------------------ |
300 | | // Main function |
301 | | |
302 | 0 | static void* SafeMalloc(uint64_t nmemb, size_t size) { |
303 | 0 | const uint64_t total_size = nmemb * (uint64_t)size; |
304 | 0 | if (total_size != (size_t)total_size) return NULL; |
305 | 0 | return malloc((size_t)total_size); |
306 | 0 | } |
307 | | |
308 | | static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, |
309 | | const uint8_t* b_ptr, int rgb_step, int rgb_stride, |
310 | | int rgb_bit_depth, uint8_t* y_ptr, int y_stride, |
311 | | uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, |
312 | | int v_stride, int yuv_bit_depth, int width, |
313 | | int height, |
314 | | const SharpYuvConversionMatrix* yuv_matrix, |
315 | 0 | SharpYuvTransferFunctionType transfer_type) { |
316 | | // we expand the right/bottom border if needed |
317 | 0 | const int w = (width + 1) & ~1; |
318 | 0 | const int h = (height + 1) & ~1; |
319 | 0 | const int uv_w = w >> 1; |
320 | 0 | const int uv_h = h >> 1; |
321 | 0 | const int y_bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
322 | 0 | uint64_t prev_diff_y_sum = ~0; |
323 | 0 | int j, iter; |
324 | |
|
325 | 0 | const uint64_t tmp_buffer_size = (uint64_t)w * 3 * 2; |
326 | 0 | const uint64_t best_y_base_size = (uint64_t)w * h; |
327 | 0 | const uint64_t target_y_base_size = (uint64_t)w * h; |
328 | 0 | const uint64_t best_rgb_y_size = (uint64_t)w * 2; |
329 | 0 | const uint64_t best_uv_base_size = (uint64_t)uv_w * 3 * uv_h; |
330 | 0 | const uint64_t target_uv_base_size = (uint64_t)uv_w * 3 * uv_h; |
331 | 0 | const uint64_t best_rgb_uv_size = (uint64_t)uv_w * 3; |
332 | 0 | fixed_y_t* const tmp_buffer = (fixed_y_t*)SafeMalloc( |
333 | 0 | (tmp_buffer_size + best_y_base_size + target_y_base_size + |
334 | 0 | best_rgb_y_size) + |
335 | 0 | (best_uv_base_size + target_uv_base_size + best_rgb_uv_size), |
336 | 0 | sizeof(*tmp_buffer)); |
337 | 0 | fixed_y_t *best_y_base, *target_y_base, *best_rgb_y; |
338 | 0 | fixed_t *best_uv_base, *target_uv_base, *best_rgb_uv; |
339 | 0 | fixed_y_t *best_y, *target_y; |
340 | 0 | fixed_t *best_uv, *target_uv; |
341 | 0 | const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); |
342 | 0 | int ok; |
343 | 0 | assert(w > 0); |
344 | 0 | assert(h > 0); |
345 | 0 | assert(sizeof(fixed_y_t) == sizeof(fixed_t)); |
346 | |
|
347 | 0 | if (tmp_buffer == NULL) { |
348 | 0 | ok = 0; |
349 | 0 | goto End; |
350 | 0 | } |
351 | 0 | best_y_base = tmp_buffer + tmp_buffer_size; |
352 | 0 | target_y_base = best_y_base + best_y_base_size; |
353 | 0 | best_rgb_y = target_y_base + target_y_base_size; |
354 | 0 | best_uv_base = (fixed_t*)(best_rgb_y + best_rgb_y_size); |
355 | 0 | target_uv_base = best_uv_base + best_uv_base_size; |
356 | 0 | best_rgb_uv = target_uv_base + target_uv_base_size; |
357 | 0 | best_y = best_y_base; |
358 | 0 | target_y = target_y_base; |
359 | 0 | best_uv = best_uv_base; |
360 | 0 | target_uv = target_uv_base; |
361 | | |
362 | | // Import RGB samples to W/RGB representation. |
363 | 0 | for (j = 0; j < height; j += 2) { |
364 | 0 | const int is_last_row = (j == height - 1); |
365 | 0 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
366 | 0 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
367 | | |
368 | | // prepare two rows of input |
369 | 0 | ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, src1); |
370 | 0 | if (!is_last_row) { |
371 | 0 | ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, |
372 | 0 | rgb_step, rgb_bit_depth, width, src2); |
373 | 0 | } else { |
374 | 0 | memcpy(src2, src1, 3 * w * sizeof(*src2)); |
375 | 0 | } |
376 | 0 | StoreGray(src1, best_y + 0, w); |
377 | 0 | StoreGray(src2, best_y + w, w); |
378 | |
|
379 | 0 | UpdateW(src1, target_y, w, y_bit_depth, transfer_type); |
380 | 0 | UpdateW(src2, target_y + w, w, y_bit_depth, transfer_type); |
381 | 0 | UpdateChroma(src1, src2, target_uv, uv_w, y_bit_depth, transfer_type); |
382 | 0 | memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); |
383 | 0 | best_y += 2 * w; |
384 | 0 | best_uv += 3 * uv_w; |
385 | 0 | target_y += 2 * w; |
386 | 0 | target_uv += 3 * uv_w; |
387 | 0 | r_ptr += 2 * rgb_stride; |
388 | 0 | g_ptr += 2 * rgb_stride; |
389 | 0 | b_ptr += 2 * rgb_stride; |
390 | 0 | } |
391 | | |
392 | | // Iterate and resolve clipping conflicts. |
393 | 0 | for (iter = 0; iter < kNumIterations; ++iter) { |
394 | 0 | const fixed_t* cur_uv = best_uv_base; |
395 | 0 | const fixed_t* prev_uv = best_uv_base; |
396 | 0 | uint64_t diff_y_sum = 0; |
397 | |
|
398 | 0 | best_y = best_y_base; |
399 | 0 | best_uv = best_uv_base; |
400 | 0 | target_y = target_y_base; |
401 | 0 | target_uv = target_uv_base; |
402 | 0 | j = 0; |
403 | 0 | do { |
404 | 0 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
405 | 0 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
406 | 0 | { |
407 | 0 | const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); |
408 | 0 | InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2, |
409 | 0 | y_bit_depth); |
410 | 0 | prev_uv = cur_uv; |
411 | 0 | cur_uv = next_uv; |
412 | 0 | } |
413 | |
|
414 | 0 | UpdateW(src1, best_rgb_y + 0 * w, w, y_bit_depth, transfer_type); |
415 | 0 | UpdateW(src2, best_rgb_y + 1 * w, w, y_bit_depth, transfer_type); |
416 | 0 | UpdateChroma(src1, src2, best_rgb_uv, uv_w, y_bit_depth, transfer_type); |
417 | | |
418 | | // update two rows of Y and one row of RGB |
419 | 0 | diff_y_sum += |
420 | 0 | SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, y_bit_depth); |
421 | 0 | SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); |
422 | |
|
423 | 0 | best_y += 2 * w; |
424 | 0 | best_uv += 3 * uv_w; |
425 | 0 | target_y += 2 * w; |
426 | 0 | target_uv += 3 * uv_w; |
427 | 0 | j += 2; |
428 | 0 | } while (j < h); |
429 | | // test exit condition |
430 | 0 | if (iter > 0) { |
431 | 0 | if (diff_y_sum < diff_y_threshold) break; |
432 | 0 | if (diff_y_sum > prev_diff_y_sum) break; |
433 | 0 | } |
434 | 0 | prev_diff_y_sum = diff_y_sum; |
435 | 0 | } |
436 | | |
437 | | // final reconstruction |
438 | 0 | ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr, |
439 | 0 | u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, |
440 | 0 | width, height, yuv_matrix); |
441 | |
|
442 | 0 | End: |
443 | 0 | free(tmp_buffer); |
444 | 0 | return ok; |
445 | 0 | } |
446 | | |
447 | | #if defined(WEBP_USE_THREAD) && !defined(_WIN32) |
448 | | #include <pthread.h> // NOLINT |
449 | | |
450 | | #define LOCK_ACCESS \ |
451 | 0 | static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \ |
452 | 0 | if (pthread_mutex_lock(&sharpyuv_lock)) return |
453 | | #define UNLOCK_ACCESS_AND_RETURN \ |
454 | 0 | do { \ |
455 | 0 | (void)pthread_mutex_unlock(&sharpyuv_lock); \ |
456 | 0 | return; \ |
457 | 0 | } while (0) |
458 | | #else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) |
459 | | #define LOCK_ACCESS \ |
460 | | do { \ |
461 | | } while (0) |
462 | | #define UNLOCK_ACCESS_AND_RETURN return |
463 | | #endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) |
464 | | |
465 | | // Hidden exported init function. |
466 | | // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, |
467 | | // users can declare it as extern and call it with an alternate VP8CPUInfo |
468 | | // function. |
469 | | extern VP8CPUInfo SharpYuvGetCPUInfo; |
470 | | SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); |
471 | 0 | void SharpYuvInit(VP8CPUInfo cpu_info_func) { |
472 | 0 | static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = |
473 | 0 | (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; |
474 | 0 | LOCK_ACCESS; |
475 | | // Only update SharpYuvGetCPUInfo when called from external code to avoid a |
476 | | // race on reading the value in SharpYuvConvert(). |
477 | 0 | if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { |
478 | 0 | SharpYuvGetCPUInfo = cpu_info_func; |
479 | 0 | } |
480 | 0 | if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { |
481 | 0 | UNLOCK_ACCESS_AND_RETURN; |
482 | 0 | } |
483 | | |
484 | 0 | SharpYuvInitDsp(); |
485 | 0 | SharpYuvInitGammaTables(); |
486 | |
|
487 | 0 | sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo; |
488 | 0 | UNLOCK_ACCESS_AND_RETURN; |
489 | 0 | } |
490 | | |
491 | | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, |
492 | | int rgb_step, int rgb_stride, int rgb_bit_depth, |
493 | | void* y_ptr, int y_stride, void* u_ptr, int u_stride, |
494 | | void* v_ptr, int v_stride, int yuv_bit_depth, int width, |
495 | 0 | int height, const SharpYuvConversionMatrix* yuv_matrix) { |
496 | 0 | SharpYuvOptions options; |
497 | 0 | options.yuv_matrix = yuv_matrix; |
498 | 0 | options.transfer_type = kSharpYuvTransferFunctionSrgb; |
499 | 0 | return SharpYuvConvertWithOptions( |
500 | 0 | r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride, |
501 | 0 | u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options); |
502 | 0 | } |
503 | | |
504 | | int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix, |
505 | 0 | SharpYuvOptions* options, int version) { |
506 | 0 | const int major = (version >> 24); |
507 | 0 | const int minor = (version >> 16) & 0xff; |
508 | 0 | if (options == NULL || yuv_matrix == NULL || |
509 | 0 | (major == SHARPYUV_VERSION_MAJOR && major == 0 && |
510 | 0 | minor != SHARPYUV_VERSION_MINOR) || |
511 | 0 | (major != SHARPYUV_VERSION_MAJOR)) { |
512 | 0 | return 0; |
513 | 0 | } |
514 | 0 | options->yuv_matrix = yuv_matrix; |
515 | 0 | options->transfer_type = kSharpYuvTransferFunctionSrgb; |
516 | 0 | return 1; |
517 | 0 | } |
518 | | |
519 | | int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr, |
520 | | const void* b_ptr, int rgb_step, int rgb_stride, |
521 | | int rgb_bit_depth, void* y_ptr, int y_stride, |
522 | | void* u_ptr, int u_stride, void* v_ptr, |
523 | | int v_stride, int yuv_bit_depth, int width, |
524 | 0 | int height, const SharpYuvOptions* options) { |
525 | 0 | const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix; |
526 | 0 | SharpYuvTransferFunctionType transfer_type = options->transfer_type; |
527 | 0 | SharpYuvConversionMatrix scaled_matrix; |
528 | 0 | const int rgb_max = (1 << rgb_bit_depth) - 1; |
529 | 0 | const int rgb_round = 1 << (rgb_bit_depth - 1); |
530 | 0 | const int yuv_max = (1 << yuv_bit_depth) - 1; |
531 | 0 | const int sfix = GetPrecisionShift(rgb_bit_depth); |
532 | |
|
533 | 0 | if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX || |
534 | 0 | r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || |
535 | 0 | u_ptr == NULL || v_ptr == NULL) { |
536 | 0 | return 0; |
537 | 0 | } |
538 | 0 | if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && |
539 | 0 | rgb_bit_depth != 16) { |
540 | 0 | return 0; |
541 | 0 | } |
542 | 0 | if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { |
543 | 0 | return 0; |
544 | 0 | } |
545 | 0 | if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride % 2 != 0)) { |
546 | | // Step/stride should be even for uint16_t buffers. |
547 | 0 | return 0; |
548 | 0 | } |
549 | 0 | { |
550 | 0 | const uint64_t yuv_bytes = (yuv_bit_depth > 8) ? 2 : 1; |
551 | 0 | const uint64_t uv_width = (width + 1) / 2; |
552 | 0 | const uint64_t abs_step = |
553 | 0 | (uint64_t)((rgb_step < 0) ? -(int64_t)rgb_step : (int64_t)rgb_step); |
554 | 0 | const uint64_t abs_stride = |
555 | 0 | (uint64_t)((rgb_stride < 0) ? -(int64_t)rgb_stride |
556 | 0 | : (int64_t)rgb_stride); |
557 | 0 | const uint64_t total_rgb_size = (uint64_t)height * abs_stride; |
558 | 0 | const uint64_t uv_height = (height + 1) / 2; |
559 | 0 | const uint64_t total_y_size = (uint64_t)height * y_stride; |
560 | 0 | const uint64_t total_u_size = uv_height * u_stride; |
561 | 0 | const uint64_t total_v_size = uv_height * v_stride; |
562 | |
|
563 | 0 | if (y_stride < 0 || (uint64_t)y_stride < (uint64_t)width * yuv_bytes || |
564 | 0 | u_stride < 0 || (uint64_t)u_stride < uv_width * yuv_bytes || |
565 | 0 | v_stride < 0 || (uint64_t)v_stride < uv_width * yuv_bytes) { |
566 | 0 | return 0; |
567 | 0 | } |
568 | 0 | if (abs_step == 0 || abs_stride < (uint64_t)width * abs_step) { |
569 | 0 | return 0; |
570 | 0 | } |
571 | 0 | if (total_rgb_size != (size_t)total_rgb_size || |
572 | 0 | total_y_size != (size_t)total_y_size || |
573 | 0 | total_u_size != (size_t)total_u_size || |
574 | 0 | total_v_size != (size_t)total_v_size) { |
575 | 0 | return 0; |
576 | 0 | } |
577 | 0 | } |
578 | 0 | if (yuv_bit_depth > 8 && |
579 | 0 | (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { |
580 | | // Stride should be even for uint16_t buffers. |
581 | 0 | return 0; |
582 | 0 | } |
583 | | // The address of the function pointer is used to avoid a read race. |
584 | 0 | SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo); |
585 | | |
586 | | // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the |
587 | | // rgb->yuv conversion matrix. |
588 | 0 | if (rgb_bit_depth == yuv_bit_depth) { |
589 | 0 | memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix)); |
590 | 0 | } else { |
591 | 0 | int i; |
592 | 0 | for (i = 0; i < 3; ++i) { |
593 | 0 | scaled_matrix.rgb_to_y[i] = |
594 | 0 | (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; |
595 | 0 | scaled_matrix.rgb_to_u[i] = |
596 | 0 | (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; |
597 | 0 | scaled_matrix.rgb_to_v[i] = |
598 | 0 | (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; |
599 | 0 | } |
600 | 0 | } |
601 | | // Also incorporate precision change scaling. |
602 | 0 | scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix); |
603 | 0 | scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix); |
604 | 0 | scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix); |
605 | |
|
606 | 0 | return DoSharpArgbToYuv( |
607 | 0 | (const uint8_t*)r_ptr, (const uint8_t*)g_ptr, (const uint8_t*)b_ptr, |
608 | 0 | rgb_step, rgb_stride, rgb_bit_depth, (uint8_t*)y_ptr, y_stride, |
609 | 0 | (uint8_t*)u_ptr, u_stride, (uint8_t*)v_ptr, v_stride, yuv_bit_depth, |
610 | 0 | width, height, &scaled_matrix, transfer_type); |
611 | 0 | } |
612 | | |
613 | | //------------------------------------------------------------------------------ |