/src/libwebp/sharpyuv/sharpyuv.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2022 Google Inc. All Rights Reserved.  | 
2  |  | //  | 
3  |  | // Use of this source code is governed by a BSD-style license  | 
4  |  | // that can be found in the COPYING file in the root of the source  | 
5  |  | // tree. An additional intellectual property rights grant can be found  | 
6  |  | // in the file PATENTS. All contributing project authors may  | 
7  |  | // be found in the AUTHORS file in the root of the source tree.  | 
8  |  | // -----------------------------------------------------------------------------  | 
9  |  | //  | 
10  |  | // Sharp RGB to YUV conversion.  | 
11  |  | //  | 
12  |  | // Author: Skal (pascal.massimino@gmail.com)  | 
13  |  |  | 
14  |  | #include "sharpyuv/sharpyuv.h"  | 
15  |  |  | 
16  |  | #include <assert.h>  | 
17  |  | #include <limits.h>  | 
18  |  | #include <stddef.h>  | 
19  |  | #include <stdlib.h>  | 
20  |  | #include <string.h>  | 
21  |  |  | 
22  |  | #include "sharpyuv/sharpyuv_cpu.h"  | 
23  |  | #include "sharpyuv/sharpyuv_dsp.h"  | 
24  |  | #include "sharpyuv/sharpyuv_gamma.h"  | 
25  |  | #include "src/webp/types.h"  | 
26  |  |  | 
27  |  | //------------------------------------------------------------------------------  | 
28  |  |  | 
29  | 0  | int SharpYuvGetVersion(void) { | 
30  | 0  |   return SHARPYUV_VERSION;  | 
31  | 0  | }  | 
32  |  |  | 
33  |  | //------------------------------------------------------------------------------  | 
34  |  | // Sharp RGB->YUV conversion  | 
35  |  |  | 
36  |  | static const int kNumIterations = 4;  | 
37  |  |  | 
38  | 0  | #define YUV_FIX 16  // fixed-point precision for RGB->YUV  | 
39  |  | static const int kYuvHalf = 1 << (YUV_FIX - 1);  | 
40  |  |  | 
41  |  | // Max bit depth so that intermediate calculations fit in 16 bits.  | 
42  |  | static const int kMaxBitDepth = 14;  | 
43  |  |  | 
44  |  | // Returns the precision shift to use based on the input rgb_bit_depth.  | 
45  | 0  | static int GetPrecisionShift(int rgb_bit_depth) { | 
46  |  |   // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove  | 
47  |  |   // bits if needed.  | 
48  | 0  |   return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2  | 
49  | 0  |                                                : (kMaxBitDepth - rgb_bit_depth);  | 
50  | 0  | }  | 
51  |  |  | 
52  |  | typedef int16_t fixed_t;      // signed type with extra precision for UV  | 
53  |  | typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W  | 
54  |  |  | 
55  |  | //------------------------------------------------------------------------------  | 
56  |  |  | 
57  | 0  | static uint8_t clip_8b(fixed_t v) { | 
58  | 0  |   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;  | 
59  | 0  | }  | 
60  |  |  | 
61  | 0  | static uint16_t clip(fixed_t v, int max) { | 
62  | 0  |   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;  | 
63  | 0  | }  | 
64  |  |  | 
65  | 0  | static fixed_y_t clip_bit_depth(int y, int bit_depth) { | 
66  | 0  |   const int max = (1 << bit_depth) - 1;  | 
67  | 0  |   return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;  | 
68  | 0  | }  | 
69  |  |  | 
70  |  | //------------------------------------------------------------------------------  | 
71  |  |  | 
72  | 0  | static int RGBToGray(int64_t r, int64_t g, int64_t b) { | 
73  | 0  |   const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;  | 
74  | 0  |   return (int)(luma >> YUV_FIX);  | 
75  | 0  | }  | 
76  |  |  | 
77  |  | static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,  | 
78  |  |                           int rgb_bit_depth,  | 
79  | 0  |                           SharpYuvTransferFunctionType transfer_type) { | 
80  | 0  |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);  | 
81  | 0  |   const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type);  | 
82  | 0  |   const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type);  | 
83  | 0  |   const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type);  | 
84  | 0  |   const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type);  | 
85  | 0  |   return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth,  | 
86  | 0  |                                transfer_type);  | 
87  | 0  | }  | 
88  |  |  | 
89  |  | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,  | 
90  |  |                                 int rgb_bit_depth,  | 
91  | 0  |                                 SharpYuvTransferFunctionType transfer_type) { | 
92  | 0  |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);  | 
93  | 0  |   int i = 0;  | 
94  | 0  |   do { | 
95  | 0  |     const uint32_t R =  | 
96  | 0  |         SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type);  | 
97  | 0  |     const uint32_t G =  | 
98  | 0  |         SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type);  | 
99  | 0  |     const uint32_t B =  | 
100  | 0  |         SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type);  | 
101  | 0  |     const uint32_t Y = RGBToGray(R, G, B);  | 
102  | 0  |     dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type);  | 
103  | 0  |   } while (++i < w);  | 
104  | 0  | }  | 
105  |  |  | 
106  |  | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,  | 
107  |  |                          fixed_t* dst, int uv_w, int rgb_bit_depth,  | 
108  | 0  |                          SharpYuvTransferFunctionType transfer_type) { | 
109  | 0  |   int i = 0;  | 
110  | 0  |   do { | 
111  | 0  |     const int r =  | 
112  | 0  |         ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],  | 
113  | 0  |                   src2[0 * uv_w + 1], rgb_bit_depth, transfer_type);  | 
114  | 0  |     const int g =  | 
115  | 0  |         ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],  | 
116  | 0  |                   src2[2 * uv_w + 1], rgb_bit_depth, transfer_type);  | 
117  | 0  |     const int b =  | 
118  | 0  |         ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],  | 
119  | 0  |                   src2[4 * uv_w + 1], rgb_bit_depth, transfer_type);  | 
120  | 0  |     const int W = RGBToGray(r, g, b);  | 
121  | 0  |     dst[0 * uv_w] = (fixed_t)(r - W);  | 
122  | 0  |     dst[1 * uv_w] = (fixed_t)(g - W);  | 
123  | 0  |     dst[2 * uv_w] = (fixed_t)(b - W);  | 
124  | 0  |     dst  += 1;  | 
125  | 0  |     src1 += 2;  | 
126  | 0  |     src2 += 2;  | 
127  | 0  |   } while (++i < uv_w);  | 
128  | 0  | }  | 
129  |  |  | 
130  | 0  | static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { | 
131  | 0  |   int i = 0;  | 
132  | 0  |   assert(w > 0);  | 
133  | 0  |   do { | 
134  | 0  |     y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);  | 
135  | 0  |   } while (++i < w);  | 
136  | 0  | }  | 
137  |  |  | 
138  |  | //------------------------------------------------------------------------------  | 
139  |  |  | 
140  | 0  | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { | 
141  | 0  |   const int v0 = (A * 3 + B + 2) >> 2;  | 
142  | 0  |   return clip_bit_depth(v0 + W0, bit_depth);  | 
143  | 0  | }  | 
144  |  |  | 
145  |  | //------------------------------------------------------------------------------  | 
146  |  |  | 
147  | 0  | static WEBP_INLINE int Shift(int v, int shift) { | 
148  | 0  |   return (shift >= 0) ? (v << shift) : (v >> -shift);  | 
149  | 0  | }  | 
150  |  |  | 
151  |  | static void ImportOneRow(const uint8_t* const r_ptr,  | 
152  |  |                          const uint8_t* const g_ptr,  | 
153  |  |                          const uint8_t* const b_ptr,  | 
154  |  |                          int rgb_step,  | 
155  |  |                          int rgb_bit_depth,  | 
156  |  |                          int pic_width,  | 
157  | 0  |                          fixed_y_t* const dst) { | 
158  |  |   // Convert the rgb_step from a number of bytes to a number of uint8_t or  | 
159  |  |   // uint16_t values depending the bit depth.  | 
160  | 0  |   const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;  | 
161  | 0  |   int i = 0;  | 
162  | 0  |   const int w = (pic_width + 1) & ~1;  | 
163  | 0  |   do { | 
164  | 0  |     const int off = i * step;  | 
165  | 0  |     const int shift = GetPrecisionShift(rgb_bit_depth);  | 
166  | 0  |     if (rgb_bit_depth == 8) { | 
167  | 0  |       dst[i + 0 * w] = Shift(r_ptr[off], shift);  | 
168  | 0  |       dst[i + 1 * w] = Shift(g_ptr[off], shift);  | 
169  | 0  |       dst[i + 2 * w] = Shift(b_ptr[off], shift);  | 
170  | 0  |     } else { | 
171  | 0  |       dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);  | 
172  | 0  |       dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);  | 
173  | 0  |       dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);  | 
174  | 0  |     }  | 
175  | 0  |   } while (++i < pic_width);  | 
176  | 0  |   if (pic_width & 1) {  // replicate rightmost pixel | 
177  | 0  |     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];  | 
178  | 0  |     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];  | 
179  | 0  |     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];  | 
180  | 0  |   }  | 
181  | 0  | }  | 
182  |  |  | 
183  |  | static void InterpolateTwoRows(const fixed_y_t* const best_y,  | 
184  |  |                                const fixed_t* prev_uv,  | 
185  |  |                                const fixed_t* cur_uv,  | 
186  |  |                                const fixed_t* next_uv,  | 
187  |  |                                int w,  | 
188  |  |                                fixed_y_t* out1,  | 
189  |  |                                fixed_y_t* out2,  | 
190  | 0  |                                int rgb_bit_depth) { | 
191  | 0  |   const int uv_w = w >> 1;  | 
192  | 0  |   const int len = (w - 1) >> 1;   // length to filter  | 
193  | 0  |   int k = 3;  | 
194  | 0  |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);  | 
195  | 0  |   while (k-- > 0) {   // process each R/G/B segments in turn | 
196  |  |     // special boundary case for i==0  | 
197  | 0  |     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);  | 
198  | 0  |     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);  | 
199  |  | 
  | 
200  | 0  |     SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,  | 
201  | 0  |                       bit_depth);  | 
202  | 0  |     SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,  | 
203  | 0  |                       bit_depth);  | 
204  |  |  | 
205  |  |     // special boundary case for i == w - 1 when w is even  | 
206  | 0  |     if (!(w & 1)) { | 
207  | 0  |       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],  | 
208  | 0  |                             best_y[w - 1 + 0], bit_depth);  | 
209  | 0  |       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],  | 
210  | 0  |                             best_y[w - 1 + w], bit_depth);  | 
211  | 0  |     }  | 
212  | 0  |     out1 += w;  | 
213  | 0  |     out2 += w;  | 
214  | 0  |     prev_uv += uv_w;  | 
215  | 0  |     cur_uv  += uv_w;  | 
216  | 0  |     next_uv += uv_w;  | 
217  | 0  |   }  | 
218  | 0  | }  | 
219  |  |  | 
220  |  | static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,  | 
221  | 0  |                                          const int coeffs[4], int sfix) { | 
222  | 0  |   const int srounder = 1 << (YUV_FIX + sfix - 1);  | 
223  | 0  |   const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +  | 
224  | 0  |                    coeffs[3] + srounder;  | 
225  | 0  |   return (luma >> (YUV_FIX + sfix));  | 
226  | 0  | }  | 
227  |  |  | 
228  |  | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,  | 
229  |  |                             uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,  | 
230  |  |                             int u_stride, uint8_t* v_ptr, int v_stride,  | 
231  |  |                             int rgb_bit_depth,  | 
232  |  |                             int yuv_bit_depth, int width, int height,  | 
233  | 0  |                             const SharpYuvConversionMatrix* yuv_matrix) { | 
234  | 0  |   int i, j;  | 
235  | 0  |   const fixed_t* const best_uv_base = best_uv;  | 
236  | 0  |   const int w = (width + 1) & ~1;  | 
237  | 0  |   const int h = (height + 1) & ~1;  | 
238  | 0  |   const int uv_w = w >> 1;  | 
239  | 0  |   const int uv_h = h >> 1;  | 
240  | 0  |   const int sfix = GetPrecisionShift(rgb_bit_depth);  | 
241  | 0  |   const int yuv_max = (1 << yuv_bit_depth) - 1;  | 
242  |  | 
  | 
243  | 0  |   best_uv = best_uv_base;  | 
244  | 0  |   j = 0;  | 
245  | 0  |   do { | 
246  | 0  |     i = 0;  | 
247  | 0  |     do { | 
248  | 0  |       const int off = (i >> 1);  | 
249  | 0  |       const int W = best_y[i];  | 
250  | 0  |       const int r = best_uv[off + 0 * uv_w] + W;  | 
251  | 0  |       const int g = best_uv[off + 1 * uv_w] + W;  | 
252  | 0  |       const int b = best_uv[off + 2 * uv_w] + W;  | 
253  | 0  |       const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);  | 
254  | 0  |       if (yuv_bit_depth <= 8) { | 
255  | 0  |         y_ptr[i] = clip_8b(y);  | 
256  | 0  |       } else { | 
257  | 0  |         ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);  | 
258  | 0  |       }  | 
259  | 0  |     } while (++i < width);  | 
260  | 0  |     best_y += w;  | 
261  | 0  |     best_uv += (j & 1) * 3 * uv_w;  | 
262  | 0  |     y_ptr += y_stride;  | 
263  | 0  |   } while (++j < height);  | 
264  |  | 
  | 
265  | 0  |   best_uv = best_uv_base;  | 
266  | 0  |   j = 0;  | 
267  | 0  |   do { | 
268  | 0  |     i = 0;  | 
269  | 0  |     do { | 
270  |  |       // Note r, g and b values here are off by W, but a constant offset on all  | 
271  |  |       // 3 components doesn't change the value of u and v with a YCbCr matrix.  | 
272  | 0  |       const int r = best_uv[i + 0 * uv_w];  | 
273  | 0  |       const int g = best_uv[i + 1 * uv_w];  | 
274  | 0  |       const int b = best_uv[i + 2 * uv_w];  | 
275  | 0  |       const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);  | 
276  | 0  |       const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);  | 
277  | 0  |       if (yuv_bit_depth <= 8) { | 
278  | 0  |         u_ptr[i] = clip_8b(u);  | 
279  | 0  |         v_ptr[i] = clip_8b(v);  | 
280  | 0  |       } else { | 
281  | 0  |         ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);  | 
282  | 0  |         ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);  | 
283  | 0  |       }  | 
284  | 0  |     } while (++i < uv_w);  | 
285  | 0  |     best_uv += 3 * uv_w;  | 
286  | 0  |     u_ptr += u_stride;  | 
287  | 0  |     v_ptr += v_stride;  | 
288  | 0  |   } while (++j < uv_h);  | 
289  | 0  |   return 1;  | 
290  | 0  | }  | 
291  |  |  | 
292  |  | //------------------------------------------------------------------------------  | 
293  |  | // Main function  | 
294  |  |  | 
295  | 0  | static void* SafeMalloc(uint64_t nmemb, size_t size) { | 
296  | 0  |   const uint64_t total_size = nmemb * (uint64_t)size;  | 
297  | 0  |   if (total_size != (size_t)total_size) return NULL;  | 
298  | 0  |   return malloc((size_t)total_size);  | 
299  | 0  | }  | 
300  |  |  | 
301  | 0  | #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((uint64_t)(W) * (H), sizeof(T)))  | 
302  |  |  | 
303  |  | static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,  | 
304  |  |                             const uint8_t* b_ptr, int rgb_step, int rgb_stride,  | 
305  |  |                             int rgb_bit_depth, uint8_t* y_ptr, int y_stride,  | 
306  |  |                             uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,  | 
307  |  |                             int v_stride, int yuv_bit_depth, int width,  | 
308  |  |                             int height,  | 
309  |  |                             const SharpYuvConversionMatrix* yuv_matrix,  | 
310  | 0  |                             SharpYuvTransferFunctionType transfer_type) { | 
311  |  |   // we expand the right/bottom border if needed  | 
312  | 0  |   const int w = (width + 1) & ~1;  | 
313  | 0  |   const int h = (height + 1) & ~1;  | 
314  | 0  |   const int uv_w = w >> 1;  | 
315  | 0  |   const int uv_h = h >> 1;  | 
316  | 0  |   const int y_bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);  | 
317  | 0  |   uint64_t prev_diff_y_sum = ~0;  | 
318  | 0  |   int j, iter;  | 
319  |  |  | 
320  |  |   // TODO(skal): allocate one big memory chunk. But for now, it's easier  | 
321  |  |   // for valgrind debugging to have several chunks.  | 
322  | 0  |   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch  | 
323  | 0  |   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);  | 
324  | 0  |   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);  | 
325  | 0  |   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);  | 
326  | 0  |   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);  | 
327  | 0  |   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);  | 
328  | 0  |   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);  | 
329  | 0  |   fixed_y_t* best_y = best_y_base;  | 
330  | 0  |   fixed_y_t* target_y = target_y_base;  | 
331  | 0  |   fixed_t* best_uv = best_uv_base;  | 
332  | 0  |   fixed_t* target_uv = target_uv_base;  | 
333  | 0  |   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);  | 
334  | 0  |   int ok;  | 
335  | 0  |   assert(w > 0);  | 
336  | 0  |   assert(h > 0);  | 
337  |  |  | 
338  | 0  |   if (best_y_base == NULL || best_uv_base == NULL ||  | 
339  | 0  |       target_y_base == NULL || target_uv_base == NULL ||  | 
340  | 0  |       best_rgb_y == NULL || best_rgb_uv == NULL ||  | 
341  | 0  |       tmp_buffer == NULL) { | 
342  | 0  |     ok = 0;  | 
343  | 0  |     goto End;  | 
344  | 0  |   }  | 
345  |  |  | 
346  |  |   // Import RGB samples to W/RGB representation.  | 
347  | 0  |   for (j = 0; j < height; j += 2) { | 
348  | 0  |     const int is_last_row = (j == height - 1);  | 
349  | 0  |     fixed_y_t* const src1 = tmp_buffer + 0 * w;  | 
350  | 0  |     fixed_y_t* const src2 = tmp_buffer + 3 * w;  | 
351  |  |  | 
352  |  |     // prepare two rows of input  | 
353  | 0  |     ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,  | 
354  | 0  |                  src1);  | 
355  | 0  |     if (!is_last_row) { | 
356  | 0  |       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,  | 
357  | 0  |                    rgb_step, rgb_bit_depth, width, src2);  | 
358  | 0  |     } else { | 
359  | 0  |       memcpy(src2, src1, 3 * w * sizeof(*src2));  | 
360  | 0  |     }  | 
361  | 0  |     StoreGray(src1, best_y + 0, w);  | 
362  | 0  |     StoreGray(src2, best_y + w, w);  | 
363  |  | 
  | 
364  | 0  |     UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type);  | 
365  | 0  |     UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type);  | 
366  | 0  |     UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type);  | 
367  | 0  |     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));  | 
368  | 0  |     best_y += 2 * w;  | 
369  | 0  |     best_uv += 3 * uv_w;  | 
370  | 0  |     target_y += 2 * w;  | 
371  | 0  |     target_uv += 3 * uv_w;  | 
372  | 0  |     r_ptr += 2 * rgb_stride;  | 
373  | 0  |     g_ptr += 2 * rgb_stride;  | 
374  | 0  |     b_ptr += 2 * rgb_stride;  | 
375  | 0  |   }  | 
376  |  |  | 
377  |  |   // Iterate and resolve clipping conflicts.  | 
378  | 0  |   for (iter = 0; iter < kNumIterations; ++iter) { | 
379  | 0  |     const fixed_t* cur_uv = best_uv_base;  | 
380  | 0  |     const fixed_t* prev_uv = best_uv_base;  | 
381  | 0  |     uint64_t diff_y_sum = 0;  | 
382  |  | 
  | 
383  | 0  |     best_y = best_y_base;  | 
384  | 0  |     best_uv = best_uv_base;  | 
385  | 0  |     target_y = target_y_base;  | 
386  | 0  |     target_uv = target_uv_base;  | 
387  | 0  |     j = 0;  | 
388  | 0  |     do { | 
389  | 0  |       fixed_y_t* const src1 = tmp_buffer + 0 * w;  | 
390  | 0  |       fixed_y_t* const src2 = tmp_buffer + 3 * w;  | 
391  | 0  |       { | 
392  | 0  |         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);  | 
393  | 0  |         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,  | 
394  | 0  |                            src1, src2, rgb_bit_depth);  | 
395  | 0  |         prev_uv = cur_uv;  | 
396  | 0  |         cur_uv = next_uv;  | 
397  | 0  |       }  | 
398  |  | 
  | 
399  | 0  |       UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type);  | 
400  | 0  |       UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type);  | 
401  | 0  |       UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type);  | 
402  |  |  | 
403  |  |       // update two rows of Y and one row of RGB  | 
404  | 0  |       diff_y_sum +=  | 
405  | 0  |           SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, y_bit_depth);  | 
406  | 0  |       SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);  | 
407  |  | 
  | 
408  | 0  |       best_y += 2 * w;  | 
409  | 0  |       best_uv += 3 * uv_w;  | 
410  | 0  |       target_y += 2 * w;  | 
411  | 0  |       target_uv += 3 * uv_w;  | 
412  | 0  |       j += 2;  | 
413  | 0  |     } while (j < h);  | 
414  |  |     // test exit condition  | 
415  | 0  |     if (iter > 0) { | 
416  | 0  |       if (diff_y_sum < diff_y_threshold) break;  | 
417  | 0  |       if (diff_y_sum > prev_diff_y_sum) break;  | 
418  | 0  |     }  | 
419  | 0  |     prev_diff_y_sum = diff_y_sum;  | 
420  | 0  |   }  | 
421  |  |  | 
422  |  |   // final reconstruction  | 
423  | 0  |   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,  | 
424  | 0  |                         u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,  | 
425  | 0  |                         width, height, yuv_matrix);  | 
426  |  | 
  | 
427  | 0  |  End:  | 
428  | 0  |   free(best_y_base);  | 
429  | 0  |   free(best_uv_base);  | 
430  | 0  |   free(target_y_base);  | 
431  | 0  |   free(target_uv_base);  | 
432  | 0  |   free(best_rgb_y);  | 
433  | 0  |   free(best_rgb_uv);  | 
434  | 0  |   free(tmp_buffer);  | 
435  | 0  |   return ok;  | 
436  | 0  | }  | 
437  |  |  | 
438  |  | #undef SAFE_ALLOC  | 
439  |  |  | 
440  |  | #if defined(WEBP_USE_THREAD) && !defined(_WIN32)  | 
441  |  | #include <pthread.h>  // NOLINT  | 
442  |  |  | 
443  |  | #define LOCK_ACCESS \  | 
444  | 0  |     static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \  | 
445  | 0  |     if (pthread_mutex_lock(&sharpyuv_lock)) return  | 
446  |  | #define UNLOCK_ACCESS_AND_RETURN                  \  | 
447  | 0  |     do {                                          \ | 
448  | 0  |       (void)pthread_mutex_unlock(&sharpyuv_lock); \  | 
449  | 0  |       return;                                     \  | 
450  | 0  |     } while (0)  | 
451  |  | #else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))  | 
452  |  | #define LOCK_ACCESS do {} while (0) | 
453  |  | #define UNLOCK_ACCESS_AND_RETURN return  | 
454  |  | #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)  | 
455  |  |  | 
456  |  | // Hidden exported init function.  | 
457  |  | // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,  | 
458  |  | // users can declare it as extern and call it with an alternate VP8CPUInfo  | 
459  |  | // function.  | 
460  |  | extern VP8CPUInfo SharpYuvGetCPUInfo;  | 
461  |  | SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);  | 
462  | 0  | void SharpYuvInit(VP8CPUInfo cpu_info_func) { | 
463  | 0  |   static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =  | 
464  | 0  |       (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;  | 
465  | 0  |   LOCK_ACCESS;  | 
466  |  |   // Only update SharpYuvGetCPUInfo when called from external code to avoid a  | 
467  |  |   // race on reading the value in SharpYuvConvert().  | 
468  | 0  |   if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { | 
469  | 0  |     SharpYuvGetCPUInfo = cpu_info_func;  | 
470  | 0  |   }  | 
471  | 0  |   if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { | 
472  | 0  |     UNLOCK_ACCESS_AND_RETURN;  | 
473  | 0  |   }  | 
474  |  |  | 
475  | 0  |   SharpYuvInitDsp();  | 
476  | 0  |   SharpYuvInitGammaTables();  | 
477  |  | 
  | 
478  | 0  |   sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;  | 
479  | 0  |   UNLOCK_ACCESS_AND_RETURN;  | 
480  | 0  | }  | 
481  |  |  | 
482  |  | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,  | 
483  |  |                     int rgb_step, int rgb_stride, int rgb_bit_depth,  | 
484  |  |                     void* y_ptr, int y_stride, void* u_ptr, int u_stride,  | 
485  |  |                     void* v_ptr, int v_stride, int yuv_bit_depth, int width,  | 
486  | 0  |                     int height, const SharpYuvConversionMatrix* yuv_matrix) { | 
487  | 0  |   SharpYuvOptions options;  | 
488  | 0  |   options.yuv_matrix = yuv_matrix;  | 
489  | 0  |   options.transfer_type = kSharpYuvTransferFunctionSrgb;  | 
490  | 0  |   return SharpYuvConvertWithOptions(  | 
491  | 0  |       r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride,  | 
492  | 0  |       u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options);  | 
493  | 0  | }  | 
494  |  |  | 
495  |  | int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix,  | 
496  | 0  |                                 SharpYuvOptions* options, int version) { | 
497  | 0  |   const int major = (version >> 24);  | 
498  | 0  |   const int minor = (version >> 16) & 0xff;  | 
499  | 0  |   if (options == NULL || yuv_matrix == NULL ||  | 
500  | 0  |       (major == SHARPYUV_VERSION_MAJOR && major == 0 &&  | 
501  | 0  |        minor != SHARPYUV_VERSION_MINOR) ||  | 
502  | 0  |       (major != SHARPYUV_VERSION_MAJOR)) { | 
503  | 0  |     return 0;  | 
504  | 0  |   }  | 
505  | 0  |   options->yuv_matrix = yuv_matrix;  | 
506  | 0  |   options->transfer_type = kSharpYuvTransferFunctionSrgb;  | 
507  | 0  |   return 1;  | 
508  | 0  | }  | 
509  |  |  | 
510  |  | int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,  | 
511  |  |                                const void* b_ptr, int rgb_step, int rgb_stride,  | 
512  |  |                                int rgb_bit_depth, void* y_ptr, int y_stride,  | 
513  |  |                                void* u_ptr, int u_stride, void* v_ptr,  | 
514  |  |                                int v_stride, int yuv_bit_depth, int width,  | 
515  | 0  |                                int height, const SharpYuvOptions* options) { | 
516  | 0  |   const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix;  | 
517  | 0  |   SharpYuvTransferFunctionType transfer_type = options->transfer_type;  | 
518  | 0  |   SharpYuvConversionMatrix scaled_matrix;  | 
519  | 0  |   const int rgb_max = (1 << rgb_bit_depth) - 1;  | 
520  | 0  |   const int rgb_round = 1 << (rgb_bit_depth - 1);  | 
521  | 0  |   const int yuv_max = (1 << yuv_bit_depth) - 1;  | 
522  | 0  |   const int sfix = GetPrecisionShift(rgb_bit_depth);  | 
523  |  | 
  | 
524  | 0  |   if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||  | 
525  | 0  |       r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||  | 
526  | 0  |       u_ptr == NULL || v_ptr == NULL) { | 
527  | 0  |     return 0;  | 
528  | 0  |   }  | 
529  | 0  |   if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&  | 
530  | 0  |       rgb_bit_depth != 16) { | 
531  | 0  |     return 0;  | 
532  | 0  |   }  | 
533  | 0  |   if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { | 
534  | 0  |     return 0;  | 
535  | 0  |   }  | 
536  | 0  |   if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride % 2 != 0)) { | 
537  |  |     // Step/stride should be even for uint16_t buffers.  | 
538  | 0  |     return 0;  | 
539  | 0  |   }  | 
540  | 0  |   if (yuv_bit_depth > 8 &&  | 
541  | 0  |       (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { | 
542  |  |     // Stride should be even for uint16_t buffers.  | 
543  | 0  |     return 0;  | 
544  | 0  |   }  | 
545  |  |   // The address of the function pointer is used to avoid a read race.  | 
546  | 0  |   SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);  | 
547  |  |  | 
548  |  |   // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the  | 
549  |  |   // rgb->yuv conversion matrix.  | 
550  | 0  |   if (rgb_bit_depth == yuv_bit_depth) { | 
551  | 0  |     memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));  | 
552  | 0  |   } else { | 
553  | 0  |     int i;  | 
554  | 0  |     for (i = 0; i < 3; ++i) { | 
555  | 0  |       scaled_matrix.rgb_to_y[i] =  | 
556  | 0  |           (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;  | 
557  | 0  |       scaled_matrix.rgb_to_u[i] =  | 
558  | 0  |           (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;  | 
559  | 0  |       scaled_matrix.rgb_to_v[i] =  | 
560  | 0  |           (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;  | 
561  | 0  |     }  | 
562  | 0  |   }  | 
563  |  |   // Also incorporate precision change scaling.  | 
564  | 0  |   scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);  | 
565  | 0  |   scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);  | 
566  | 0  |   scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);  | 
567  |  | 
  | 
568  | 0  |   return DoSharpArgbToYuv(  | 
569  | 0  |       (const uint8_t*)r_ptr, (const uint8_t*)g_ptr, (const uint8_t*)b_ptr,  | 
570  | 0  |       rgb_step, rgb_stride, rgb_bit_depth, (uint8_t*)y_ptr, y_stride,  | 
571  | 0  |       (uint8_t*)u_ptr, u_stride, (uint8_t*)v_ptr, v_stride, yuv_bit_depth,  | 
572  | 0  |       width, height, &scaled_matrix, transfer_type);  | 
573  | 0  | }  | 
574  |  |  | 
575  |  | //------------------------------------------------------------------------------  |