/src/libwebp/src/dsp/yuv.c
Line | Count | Source |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // YUV->RGB conversion functions |
11 | | // |
12 | | // Author: Skal (pascal.massimino@gmail.com) |
13 | | |
14 | | #include "src/dsp/yuv.h" |
15 | | |
16 | | #include <assert.h> |
17 | | #include <stdlib.h> |
18 | | #include <string.h> |
19 | | |
20 | | #include "src/dsp/cpu.h" |
21 | | #include "src/dsp/dsp.h" |
22 | | #include "src/webp/decode.h" |
23 | | #include "src/webp/types.h" |
24 | | |
25 | | // Uncomment to disable gamma-compression during RGB->U/V averaging |
26 | | #define USE_GAMMA_COMPRESSION |
27 | | |
28 | | // If defined, use table to compute x / alpha. |
29 | | #define USE_INVERSE_ALPHA_TABLE |
30 | | |
31 | | #ifdef USE_GAMMA_COMPRESSION |
32 | | #include <math.h> |
33 | | #endif |
34 | | |
35 | | //----------------------------------------------------------------------------- |
36 | | // Plain-C version |
37 | | |
38 | | #define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \ |
39 | | static void FUNC_NAME( \ |
40 | | const uint8_t* WEBP_RESTRICT y, const uint8_t* WEBP_RESTRICT u, \ |
41 | 0 | const uint8_t* WEBP_RESTRICT v, uint8_t* WEBP_RESTRICT dst, int len) { \ |
42 | 0 | const uint8_t* const end = dst + (len & ~1) * (XSTEP); \ |
43 | 0 | while (dst != end) { \ |
44 | 0 | FUNC(y[0], u[0], v[0], dst); \ |
45 | 0 | FUNC(y[1], u[0], v[0], dst + (XSTEP)); \ |
46 | 0 | y += 2; \ |
47 | 0 | ++u; \ |
48 | 0 | ++v; \ |
49 | 0 | dst += 2 * (XSTEP); \ |
50 | 0 | } \ |
51 | 0 | if (len & 1) { \ |
52 | 0 | FUNC(y[0], u[0], v[0], dst); \ |
53 | 0 | } \ |
54 | 0 | } Unexecuted instantiation: yuv.c:YuvToRgbRow Unexecuted instantiation: yuv.c:YuvToRgbaRow Unexecuted instantiation: yuv.c:YuvToBgrRow Unexecuted instantiation: yuv.c:YuvToBgraRow Unexecuted instantiation: yuv.c:YuvToArgbRow Unexecuted instantiation: yuv.c:YuvToRgba4444Row Unexecuted instantiation: yuv.c:YuvToRgb565Row |
55 | | |
56 | | // All variants implemented. |
57 | | ROW_FUNC(YuvToRgbRow, VP8YuvToRgb, 3) |
58 | | ROW_FUNC(YuvToBgrRow, VP8YuvToBgr, 3) |
59 | | ROW_FUNC(YuvToRgbaRow, VP8YuvToRgba, 4) |
60 | | ROW_FUNC(YuvToBgraRow, VP8YuvToBgra, 4) |
61 | | ROW_FUNC(YuvToArgbRow, VP8YuvToArgb, 4) |
62 | | ROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2) |
63 | | ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2) |
64 | | |
65 | | #undef ROW_FUNC |
66 | | |
67 | | // Main call for processing a plane with a WebPSamplerRowFunc function: |
68 | | void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride, |
69 | | const uint8_t* WEBP_RESTRICT u, |
70 | | const uint8_t* WEBP_RESTRICT v, int uv_stride, |
71 | | uint8_t* WEBP_RESTRICT dst, int dst_stride, |
72 | 0 | int width, int height, WebPSamplerRowFunc func) { |
73 | 0 | int j; |
74 | 0 | for (j = 0; j < height; ++j) { |
75 | 0 | func(y, u, v, dst, width); |
76 | 0 | y += y_stride; |
77 | 0 | if (j & 1) { |
78 | 0 | u += uv_stride; |
79 | 0 | v += uv_stride; |
80 | 0 | } |
81 | 0 | dst += dst_stride; |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | | //----------------------------------------------------------------------------- |
86 | | // Main call |
87 | | |
88 | | WebPSamplerRowFunc WebPSamplers[MODE_LAST]; |
89 | | |
90 | | extern VP8CPUInfo VP8GetCPUInfo; |
91 | | extern void WebPInitSamplersSSE2(void); |
92 | | extern void WebPInitSamplersSSE41(void); |
93 | | extern void WebPInitSamplersMIPS32(void); |
94 | | extern void WebPInitSamplersMIPSdspR2(void); |
95 | | |
96 | 1 | WEBP_DSP_INIT_FUNC(WebPInitSamplers) { |
97 | 1 | WebPSamplers[MODE_RGB] = YuvToRgbRow; |
98 | 1 | WebPSamplers[MODE_RGBA] = YuvToRgbaRow; |
99 | 1 | WebPSamplers[MODE_BGR] = YuvToBgrRow; |
100 | 1 | WebPSamplers[MODE_BGRA] = YuvToBgraRow; |
101 | 1 | WebPSamplers[MODE_ARGB] = YuvToArgbRow; |
102 | 1 | WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row; |
103 | 1 | WebPSamplers[MODE_RGB_565] = YuvToRgb565Row; |
104 | 1 | WebPSamplers[MODE_rgbA] = YuvToRgbaRow; |
105 | 1 | WebPSamplers[MODE_bgrA] = YuvToBgraRow; |
106 | 1 | WebPSamplers[MODE_Argb] = YuvToArgbRow; |
107 | 1 | WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row; |
108 | | |
109 | | // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
110 | 1 | if (VP8GetCPUInfo != NULL) { |
111 | 1 | #if defined(WEBP_HAVE_SSE2) |
112 | 1 | if (VP8GetCPUInfo(kSSE2)) { |
113 | 1 | WebPInitSamplersSSE2(); |
114 | 1 | } |
115 | 1 | #endif // WEBP_HAVE_SSE2 |
116 | 1 | #if defined(WEBP_HAVE_SSE41) |
117 | 1 | if (VP8GetCPUInfo(kSSE4_1)) { |
118 | 1 | WebPInitSamplersSSE41(); |
119 | 1 | } |
120 | 1 | #endif // WEBP_HAVE_SSE41 |
121 | | #if defined(WEBP_USE_MIPS32) |
122 | | if (VP8GetCPUInfo(kMIPS32)) { |
123 | | WebPInitSamplersMIPS32(); |
124 | | } |
125 | | #endif // WEBP_USE_MIPS32 |
126 | | #if defined(WEBP_USE_MIPS_DSP_R2) |
127 | | if (VP8GetCPUInfo(kMIPSdspR2)) { |
128 | | WebPInitSamplersMIPSdspR2(); |
129 | | } |
130 | | #endif // WEBP_USE_MIPS_DSP_R2 |
131 | 1 | } |
132 | 1 | } |
133 | | |
134 | | //----------------------------------------------------------------------------- |
135 | | // ARGB -> YUV converters |
136 | | |
137 | | static void ConvertARGBToY_C(const uint32_t* WEBP_RESTRICT argb, |
138 | 0 | uint8_t* WEBP_RESTRICT y, int width) { |
139 | 0 | int i; |
140 | 0 | for (i = 0; i < width; ++i) { |
141 | 0 | const uint32_t p = argb[i]; |
142 | 0 | y[i] = |
143 | 0 | VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, YUV_HALF); |
144 | 0 | } |
145 | 0 | } |
146 | | |
147 | | void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb, |
148 | | uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, |
149 | 0 | int src_width, int do_store) { |
150 | | // No rounding. Last pixel is dealt with separately. |
151 | 0 | const int uv_width = src_width >> 1; |
152 | 0 | int i; |
153 | 0 | for (i = 0; i < uv_width; ++i) { |
154 | 0 | const uint32_t v0 = argb[2 * i + 0]; |
155 | 0 | const uint32_t v1 = argb[2 * i + 1]; |
156 | | // VP8RGBToU/V expects four accumulated pixels. Hence we need to |
157 | | // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less. |
158 | 0 | const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe); |
159 | 0 | const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe); |
160 | 0 | const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe); |
161 | 0 | const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); |
162 | 0 | const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); |
163 | 0 | if (do_store) { |
164 | 0 | u[i] = tmp_u; |
165 | 0 | v[i] = tmp_v; |
166 | 0 | } else { |
167 | | // Approximated average-of-four. But it's an acceptable diff. |
168 | 0 | u[i] = (u[i] + tmp_u + 1) >> 1; |
169 | 0 | v[i] = (v[i] + tmp_v + 1) >> 1; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | if (src_width & 1) { // last pixel |
173 | 0 | const uint32_t v0 = argb[2 * i + 0]; |
174 | 0 | const int r = (v0 >> 14) & 0x3fc; |
175 | 0 | const int g = (v0 >> 6) & 0x3fc; |
176 | 0 | const int b = (v0 << 2) & 0x3fc; |
177 | 0 | const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); |
178 | 0 | const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); |
179 | 0 | if (do_store) { |
180 | 0 | u[i] = tmp_u; |
181 | 0 | v[i] = tmp_v; |
182 | 0 | } else { |
183 | 0 | u[i] = (u[i] + tmp_u + 1) >> 1; |
184 | 0 | v[i] = (v[i] + tmp_v + 1) >> 1; |
185 | 0 | } |
186 | 0 | } |
187 | 0 | } |
188 | | |
189 | | //----------------------------------------------------------------------------- |
190 | | |
191 | | static void ConvertRGBToY_C(const uint8_t* WEBP_RESTRICT rgb, |
192 | 0 | uint8_t* WEBP_RESTRICT y, int width, int step) { |
193 | 0 | int i; |
194 | 0 | for (i = 0; i < width; ++i, rgb += step) { |
195 | 0 | y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF); |
196 | 0 | } |
197 | 0 | } |
198 | | |
199 | | static void ConvertBGRToY_C(const uint8_t* WEBP_RESTRICT bgr, |
200 | 0 | uint8_t* WEBP_RESTRICT y, int width, int step) { |
201 | 0 | int i; |
202 | 0 | for (i = 0; i < width; ++i, bgr += step) { |
203 | 0 | y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF); |
204 | 0 | } |
205 | 0 | } |
206 | | |
207 | | void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb, |
208 | | uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, |
209 | 440k | int width) { |
210 | 440k | int i; |
211 | 3.65M | for (i = 0; i < width; i += 1, rgb += 4) { |
212 | 3.21M | const int r = rgb[0], g = rgb[1], b = rgb[2]; |
213 | 3.21M | u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2); |
214 | 3.21M | v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2); |
215 | 3.21M | } |
216 | 440k | } |
217 | | |
218 | | //------------------------------------------------------------------------------ |
219 | | // Code for gamma correction |
220 | | |
221 | | #if defined(USE_GAMMA_COMPRESSION) |
222 | | |
223 | | // Gamma correction compensates loss of resolution during chroma subsampling. |
224 | 34 | #define GAMMA_FIX 12 // fixed-point precision for linear values |
225 | 1.26G | #define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision |
226 | 34 | #define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX)) |
227 | | static const double kGamma = 0.80; |
228 | | static const int kGammaScale = ((1 << GAMMA_FIX) - 1); |
229 | | static const int kGammaTabScale = (1 << GAMMA_TAB_FIX); |
230 | | static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1); |
231 | | |
232 | | static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1]; |
233 | | static uint16_t kGammaToLinearTab[256]; |
234 | | static volatile int kGammaTablesOk = 0; |
235 | | extern VP8CPUInfo VP8GetCPUInfo; |
236 | | |
237 | 1 | WEBP_DSP_INIT_FUNC(WebPInitGammaTables) { |
238 | 1 | if (!kGammaTablesOk) { |
239 | 1 | int v; |
240 | 1 | const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale; |
241 | 1 | const double norm = 1. / 255.; |
242 | 257 | for (v = 0; v <= 255; ++v) { |
243 | 256 | kGammaToLinearTab[v] = |
244 | 256 | (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); |
245 | 256 | } |
246 | 34 | for (v = 0; v <= GAMMA_TAB_SIZE; ++v) { |
247 | 33 | kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); |
248 | 33 | } |
249 | 1 | kGammaTablesOk = 1; |
250 | 1 | } |
251 | 1 | } |
252 | | |
253 | 2.52G | static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { |
254 | 2.52G | return kGammaToLinearTab[v]; |
255 | 2.52G | } |
256 | | |
257 | 630M | static WEBP_INLINE int Interpolate(int v) { |
258 | 630M | const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part |
259 | 630M | const int x = v & ((kGammaTabScale << 2) - 1); // fractional part |
260 | 630M | const int v0 = kLinearToGammaTab[tab_pos]; |
261 | 630M | const int v1 = kLinearToGammaTab[tab_pos + 1]; |
262 | 630M | const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate |
263 | 630M | assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1); |
264 | 630M | return y; |
265 | 630M | } |
266 | | |
267 | | // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision |
268 | | // U/V value, suitable for RGBToU/V calls. |
269 | 630M | static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { |
270 | 630M | const int y = Interpolate(base_value << shift); // final uplifted value |
271 | 630M | return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale |
272 | 630M | } |
273 | | |
274 | | #else |
275 | | |
276 | | void WebPInitGammaTables(void) {} |
277 | | static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; } |
278 | | static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { |
279 | | return (int)(base_value << shift); |
280 | | } |
281 | | |
282 | | #endif // USE_GAMMA_COMPRESSION |
283 | | |
284 | | #define SUM4(ptr, step) \ |
285 | | LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[(step)]) + \ |
286 | | GammaToLinear((ptr)[rgb_stride]) + \ |
287 | | GammaToLinear((ptr)[rgb_stride + (step)]), \ |
288 | | 0) |
289 | | |
290 | | #define SUM2(ptr) \ |
291 | | LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) |
292 | | |
293 | | //------------------------------------------------------------------------------ |
294 | | // "Fast" regular RGB->YUV |
295 | | |
296 | | #define SUM4(ptr, step) \ |
297 | 366M | LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[(step)]) + \ |
298 | 366M | GammaToLinear((ptr)[rgb_stride]) + \ |
299 | 366M | GammaToLinear((ptr)[rgb_stride + (step)]), \ |
300 | 366M | 0) |
301 | | |
302 | | #define SUM2(ptr) \ |
303 | 404k | LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) |
304 | | |
305 | 333M | #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride]) |
306 | 166M | #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4)) |
307 | | |
308 | | #if defined(USE_INVERSE_ALPHA_TABLE) |
309 | | |
310 | | static const int kAlphaFix = 19; |
311 | | // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix |
312 | | // formula is then equal to v / a in most (99.6%) cases. Note that this table |
313 | | // and constant are adjusted very tightly to fit 32b arithmetic. |
314 | | // In particular, they use the fact that the operands for 'v / a' are actually |
315 | | // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 |
316 | | // with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid |
317 | | // overflow is: GAMMA_FIX + kAlphaFix <= 31. |
318 | | static const uint32_t kInvAlpha[4 * 0xff + 1] = { |
319 | | 0, /* alpha = 0 */ |
320 | | 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, 58254, 52428, |
321 | | 47662, 43690, 40329, 37449, 34952, 32768, 30840, 29127, 27594, 26214, |
322 | | 24966, 23831, 22795, 21845, 20971, 20164, 19418, 18724, 18078, 17476, |
323 | | 16912, 16384, 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107, |
324 | | 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, 10699, 10485, |
325 | | 10280, 10082, 9892, 9709, 9532, 9362, 9198, 9039, 8886, 8738, |
326 | | 8594, 8456, 8322, 8192, 8065, 7943, 7825, 7710, 7598, 7489, |
327 | | 7384, 7281, 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553, |
328 | | 6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, 5890, 5825, |
329 | | 5761, 5698, 5637, 5577, 5518, 5461, 5405, 5349, 5295, 5242, |
330 | | 5190, 5140, 5090, 5041, 4993, 4946, 4899, 4854, 4809, 4766, |
331 | | 4723, 4681, 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369, |
332 | | 4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, 4064, 4032, |
333 | | 4002, 3971, 3942, 3912, 3883, 3855, 3826, 3799, 3771, 3744, |
334 | | 3718, 3692, 3666, 3640, 3615, 3591, 3566, 3542, 3518, 3495, |
335 | | 3472, 3449, 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276, |
336 | | 3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, 3102, 3084, |
337 | | 3066, 3048, 3030, 3013, 2995, 2978, 2962, 2945, 2928, 2912, |
338 | | 2896, 2880, 2864, 2849, 2833, 2818, 2803, 2788, 2774, 2759, |
339 | | 2744, 2730, 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621, |
340 | | 2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, 2508, 2496, |
341 | | 2484, 2473, 2461, 2449, 2438, 2427, 2416, 2404, 2394, 2383, |
342 | | 2372, 2361, 2351, 2340, 2330, 2319, 2309, 2299, 2289, 2279, |
343 | | 2269, 2259, 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184, |
344 | | 2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, 2105, 2097, |
345 | | 2088, 2080, 2072, 2064, 2056, 2048, 2040, 2032, 2024, 2016, |
346 | | 2008, 2001, 1993, 1985, 1978, 1971, 1963, 1956, 1949, 1941, |
347 | | 1934, 1927, 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872, |
348 | | 1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, 1814, 1807, |
349 | | 1801, 1795, 1789, 1783, 1777, 1771, 1765, 1759, 1753, 1747, |
350 | | 1741, 1736, 1730, 1724, 1718, 1713, 1707, 1702, 1696, 1691, |
351 | | 1685, 1680, 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638, |
352 | | 1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, 1593, 1588, |
353 | | 1583, 1579, 1574, 1569, 1565, 1560, 1555, 1551, 1546, 1542, |
354 | | 1537, 1533, 1528, 1524, 1519, 1515, 1510, 1506, 1502, 1497, |
355 | | 1493, 1489, 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456, |
356 | | 1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, 1420, 1416, |
357 | | 1413, 1409, 1405, 1401, 1398, 1394, 1390, 1387, 1383, 1379, |
358 | | 1376, 1372, 1368, 1365, 1361, 1358, 1354, 1351, 1347, 1344, |
359 | | 1340, 1337, 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310, |
360 | | 1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, 1281, 1278, |
361 | | 1275, 1272, 1269, 1266, 1263, 1260, 1257, 1254, 1251, 1248, |
362 | | 1245, 1242, 1239, 1236, 1233, 1230, 1227, 1224, 1222, 1219, |
363 | | 1216, 1213, 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191, |
364 | | 1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, 1167, 1165, |
365 | | 1162, 1159, 1157, 1154, 1152, 1149, 1147, 1144, 1142, 1139, |
366 | | 1137, 1134, 1132, 1129, 1127, 1125, 1122, 1120, 1117, 1115, |
367 | | 1113, 1110, 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092, |
368 | | 1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, 1072, 1069, |
369 | | 1067, 1065, 1063, 1061, 1059, 1057, 1054, 1052, 1050, 1048, |
370 | | 1046, 1044, 1042, 1040, 1038, 1036, 1034, 1032, 1030, 1028, |
371 | | 1026, 1024, 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008, |
372 | | 1006, 1004, 1002, 1000, 998, 996, 994, 992, 991, 989, |
373 | | 987, 985, 983, 981, 979, 978, 976, 974, 972, 970, |
374 | | 969, 967, 965, 963, 961, 960, 958, 956, 954, 953, |
375 | | 951, 949, 948, 946, 944, 942, 941, 939, 937, 936, |
376 | | 934, 932, 931, 929, 927, 926, 924, 923, 921, 919, |
377 | | 918, 916, 914, 913, 911, 910, 908, 907, 905, 903, |
378 | | 902, 900, 899, 897, 896, 894, 893, 891, 890, 888, |
379 | | 887, 885, 884, 882, 881, 879, 878, 876, 875, 873, |
380 | | 872, 870, 869, 868, 866, 865, 863, 862, 860, 859, |
381 | | 858, 856, 855, 853, 852, 851, 849, 848, 846, 845, |
382 | | 844, 842, 841, 840, 838, 837, 836, 834, 833, 832, |
383 | | 830, 829, 828, 826, 825, 824, 823, 821, 820, 819, |
384 | | 817, 816, 815, 814, 812, 811, 810, 809, 807, 806, |
385 | | 805, 804, 802, 801, 800, 799, 798, 796, 795, 794, |
386 | | 793, 791, 790, 789, 788, 787, 786, 784, 783, 782, |
387 | | 781, 780, 779, 777, 776, 775, 774, 773, 772, 771, |
388 | | 769, 768, 767, 766, 765, 764, 763, 762, 760, 759, |
389 | | 758, 757, 756, 755, 754, 753, 752, 751, 750, 748, |
390 | | 747, 746, 745, 744, 743, 742, 741, 740, 739, 738, |
391 | | 737, 736, 735, 734, 733, 732, 731, 730, 729, 728, |
392 | | 727, 726, 725, 724, 723, 722, 721, 720, 719, 718, |
393 | | 717, 716, 715, 714, 713, 712, 711, 710, 709, 708, |
394 | | 707, 706, 705, 704, 703, 702, 701, 700, 699, 699, |
395 | | 698, 697, 696, 695, 694, 693, 692, 691, 690, 689, |
396 | | 688, 688, 687, 686, 685, 684, 683, 682, 681, 680, |
397 | | 680, 679, 678, 677, 676, 675, 674, 673, 673, 672, |
398 | | 671, 670, 669, 668, 667, 667, 666, 665, 664, 663, |
399 | | 662, 661, 661, 660, 659, 658, 657, 657, 656, 655, |
400 | | 654, 653, 652, 652, 651, 650, 649, 648, 648, 647, |
401 | | 646, 645, 644, 644, 643, 642, 641, 640, 640, 639, |
402 | | 638, 637, 637, 636, 635, 634, 633, 633, 632, 631, |
403 | | 630, 630, 629, 628, 627, 627, 626, 625, 624, 624, |
404 | | 623, 622, 621, 621, 620, 619, 618, 618, 617, 616, |
405 | | 616, 615, 614, 613, 613, 612, 611, 611, 610, 609, |
406 | | 608, 608, 607, 606, 606, 605, 604, 604, 603, 602, |
407 | | 601, 601, 600, 599, 599, 598, 597, 597, 596, 595, |
408 | | 595, 594, 593, 593, 592, 591, 591, 590, 589, 589, |
409 | | 588, 587, 587, 586, 585, 585, 584, 583, 583, 582, |
410 | | 581, 581, 580, 579, 579, 578, 578, 577, 576, 576, |
411 | | 575, 574, 574, 573, 572, 572, 571, 571, 570, 569, |
412 | | 569, 568, 568, 567, 566, 566, 565, 564, 564, 563, |
413 | | 563, 562, 561, 561, 560, 560, 559, 558, 558, 557, |
414 | | 557, 556, 555, 555, 554, 554, 553, 553, 552, 551, |
415 | | 551, 550, 550, 549, 548, 548, 547, 547, 546, 546, |
416 | | 545, 544, 544, 543, 543, 542, 542, 541, 541, 540, |
417 | | 539, 539, 538, 538, 537, 537, 536, 536, 535, 534, |
418 | | 534, 533, 533, 532, 532, 531, 531, 530, 530, 529, |
419 | | 529, 528, 527, 527, 526, 526, 525, 525, 524, 524, |
420 | | 523, 523, 522, 522, 521, 521, 520, 520, 519, 519, |
421 | | 518, 518, 517, 517, 516, 516, 515, 515, 514, 514}; |
422 | | |
423 | | // Note that LinearToGamma() expects the values to be premultiplied by 4, |
424 | | // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly. |
425 | 264M | #define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2)) |
426 | | |
427 | | #else |
428 | | |
429 | | #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a)) |
430 | | |
431 | | #endif // USE_INVERSE_ALPHA_TABLE |
432 | | |
433 | | static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src, |
434 | | const uint8_t* a_ptr, |
435 | | uint32_t total_a, int step, |
436 | 264M | int rgb_stride) { |
437 | 264M | const uint32_t sum = |
438 | 264M | a_ptr[0] * GammaToLinear(src[0]) + |
439 | 264M | a_ptr[step] * GammaToLinear(src[step]) + |
440 | 264M | a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) + |
441 | 264M | a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]); |
442 | 264M | assert(total_a > 0 && total_a <= 4 * 0xff); |
443 | 264M | #if defined(USE_INVERSE_ALPHA_TABLE) |
444 | 264M | assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32)); |
445 | 264M | #endif |
446 | 264M | return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0); |
447 | 264M | } |
448 | | |
449 | | void WebPAccumulateRGBA(const uint8_t* const r_ptr, const uint8_t* const g_ptr, |
450 | | const uint8_t* const b_ptr, const uint8_t* const a_ptr, |
451 | 408k | int rgb_stride, uint16_t* dst, int width) { |
452 | 408k | int i, j; |
453 | | // we loop over 2x2 blocks and produce one R/G/B/A value for each. |
454 | 167M | for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) { |
455 | 166M | const uint32_t a = SUM4ALPHA(a_ptr + j); |
456 | 166M | int r, g, b; |
457 | 166M | if (a == 4 * 0xff || a == 0) { |
458 | 78.7M | r = SUM4(r_ptr + j, 4); |
459 | 78.7M | g = SUM4(g_ptr + j, 4); |
460 | 78.7M | b = SUM4(b_ptr + j, 4); |
461 | 88.0M | } else { |
462 | 88.0M | r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride); |
463 | 88.0M | g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); |
464 | 88.0M | b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); |
465 | 88.0M | } |
466 | 166M | dst[0] = r; |
467 | 166M | dst[1] = g; |
468 | 166M | dst[2] = b; |
469 | 166M | dst[3] = a; |
470 | 166M | } |
471 | 408k | if (width & 1) { |
472 | 196k | const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); |
473 | 196k | int r, g, b; |
474 | 196k | if (a == 4 * 0xff || a == 0) { |
475 | 90.1k | r = SUM2(r_ptr + j); |
476 | 90.1k | g = SUM2(g_ptr + j); |
477 | 90.1k | b = SUM2(b_ptr + j); |
478 | 106k | } else { |
479 | 106k | r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride); |
480 | 106k | g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); |
481 | 106k | b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); |
482 | 106k | } |
483 | 196k | dst[0] = r; |
484 | 196k | dst[1] = g; |
485 | 196k | dst[2] = b; |
486 | 196k | dst[3] = a; |
487 | 196k | } |
488 | 408k | } |
489 | | |
490 | | void WebPAccumulateRGB(const uint8_t* const r_ptr, const uint8_t* const g_ptr, |
491 | | const uint8_t* const b_ptr, int step, int rgb_stride, |
492 | 82.1k | uint16_t* dst, int width) { |
493 | 82.1k | int i, j; |
494 | 43.3M | for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) { |
495 | 43.2M | dst[0] = SUM4(r_ptr + j, step); |
496 | 43.2M | dst[1] = SUM4(g_ptr + j, step); |
497 | 43.2M | dst[2] = SUM4(b_ptr + j, step); |
498 | | // MemorySanitizer may raise false positives with data that passes through |
499 | | // RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles. |
500 | | // See https://crbug.com/webp/573. |
501 | | #ifdef WEBP_MSAN |
502 | | dst[3] = 0; |
503 | | #endif |
504 | 43.2M | } |
505 | 82.1k | if (width & 1) { |
506 | 44.7k | dst[0] = SUM2(r_ptr + j); |
507 | 44.7k | dst[1] = SUM2(g_ptr + j); |
508 | 44.7k | dst[2] = SUM2(b_ptr + j); |
509 | | #ifdef WEBP_MSAN |
510 | | dst[3] = 0; |
511 | | #endif |
512 | 44.7k | } |
513 | 82.1k | } |
514 | | |
515 | | static void ImportYUVAFromRGBA_C(const uint8_t* r_ptr, const uint8_t* g_ptr, |
516 | | const uint8_t* b_ptr, const uint8_t* a_ptr, |
517 | | int step, // bytes per pixel |
518 | | int rgb_stride, // bytes per scanline |
519 | | int has_alpha, int width, int height, |
520 | | uint16_t* tmp_rgb, int y_stride, int uv_stride, |
521 | | int a_stride, uint8_t* dst_y, uint8_t* dst_u, |
522 | 1.66k | uint8_t* dst_v, uint8_t* dst_a) { |
523 | 1.66k | int y; |
524 | 1.66k | const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr |
525 | 1.66k | const int uv_width = (width + 1) >> 1; |
526 | | |
527 | 1.66k | has_alpha &= dst_a != NULL; |
528 | 1.66k | if (has_alpha) { |
529 | 1.22k | #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE) |
530 | 1.22k | assert(kAlphaFix + GAMMA_FIX <= 31); |
531 | 1.22k | #endif |
532 | 1.22k | } |
533 | | |
534 | 1.66k | WebPInitGammaTables(); |
535 | | |
536 | | // Downsample Y/U/V planes, two rows at a time |
537 | 491k | for (y = 0; y < (height >> 1); ++y) { |
538 | 490k | int rows_have_alpha = has_alpha; |
539 | 490k | if (is_rgb) { |
540 | 0 | WebPConvertRGBToY(r_ptr, dst_y, width, step); |
541 | 0 | WebPConvertRGBToY(r_ptr + rgb_stride, dst_y + y_stride, width, step); |
542 | 490k | } else { |
543 | 490k | WebPConvertBGRToY(b_ptr, dst_y, width, step); |
544 | 490k | WebPConvertBGRToY(b_ptr + rgb_stride, dst_y + y_stride, width, step); |
545 | 490k | } |
546 | 490k | dst_y += 2 * y_stride; |
547 | 490k | if (has_alpha) { |
548 | 408k | rows_have_alpha &= |
549 | 408k | !WebPExtractAlpha(a_ptr, rgb_stride, width, 2, dst_a, a_stride); |
550 | 408k | dst_a += 2 * a_stride; |
551 | 408k | } else if (dst_a != NULL) { |
552 | 0 | int i; |
553 | 0 | for (i = 0; i < 2; ++i, dst_a += a_stride) { |
554 | 0 | memset(dst_a, 0xff, width); |
555 | 0 | } |
556 | 0 | } |
557 | | |
558 | | // Collect averaged R/G/B(/A) |
559 | 490k | if (!rows_have_alpha) { |
560 | 81.9k | WebPAccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width); |
561 | 408k | } else { |
562 | 408k | WebPAccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, |
563 | 408k | width); |
564 | 408k | } |
565 | | // Convert to U/V |
566 | 490k | WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); |
567 | 490k | dst_u += uv_stride; |
568 | 490k | dst_v += uv_stride; |
569 | 490k | r_ptr += 2 * rgb_stride; |
570 | 490k | b_ptr += 2 * rgb_stride; |
571 | 490k | g_ptr += 2 * rgb_stride; |
572 | 490k | if (has_alpha) a_ptr += 2 * rgb_stride; |
573 | 490k | } |
574 | 1.66k | } |
575 | | |
576 | | static void ImportYUVAFromRGBALastLine_C( |
577 | | const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr, |
578 | | const uint8_t* a_ptr, |
579 | | int step, // bytes per pixel |
580 | | int has_alpha, int width, uint16_t* tmp_rgb, uint8_t* dst_y, uint8_t* dst_u, |
581 | 828 | uint8_t* dst_v, uint8_t* dst_a) { |
582 | 828 | const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr |
583 | 828 | const int uv_width = (width + 1) >> 1; |
584 | 828 | int row_has_alpha = has_alpha && dst_a != NULL; |
585 | | |
586 | 828 | if (is_rgb) { |
587 | 0 | WebPConvertRGBToY(r_ptr, dst_y, width, step); |
588 | 828 | } else { |
589 | 828 | WebPConvertBGRToY(b_ptr, dst_y, width, step); |
590 | 828 | } |
591 | 828 | if (row_has_alpha) { |
592 | 635 | row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0); |
593 | 635 | } else if (dst_a != NULL) { |
594 | 0 | memset(dst_a, 0xff, width); |
595 | 0 | } |
596 | | |
597 | | // Collect averaged R/G/B(/A) |
598 | 828 | if (!row_has_alpha) { |
599 | | // Collect averaged R/G/B |
600 | 196 | WebPAccumulateRGB(r_ptr, g_ptr, b_ptr, step, /*rgb_stride=*/0, tmp_rgb, |
601 | 196 | width); |
602 | 632 | } else { |
603 | 632 | WebPAccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /*rgb_stride=*/0, tmp_rgb, |
604 | 632 | width); |
605 | 632 | } |
606 | 828 | WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); |
607 | 828 | } |
608 | | |
609 | | //----------------------------------------------------------------------------- |
610 | | |
611 | | void (*WebPConvertRGBToY)(const uint8_t* WEBP_RESTRICT rgb, |
612 | | uint8_t* WEBP_RESTRICT y, int width, int step); |
613 | | void (*WebPConvertBGRToY)(const uint8_t* WEBP_RESTRICT bgr, |
614 | | uint8_t* WEBP_RESTRICT y, int width, int step); |
615 | | void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb, |
616 | | uint8_t* WEBP_RESTRICT u, |
617 | | uint8_t* WEBP_RESTRICT v, int width); |
618 | | |
619 | | void (*WebPImportYUVAFromRGBA)(const uint8_t* r_ptr, const uint8_t* g_ptr, |
620 | | const uint8_t* b_ptr, const uint8_t* a_ptr, |
621 | | int step, // bytes per pixel |
622 | | int rgb_stride, // bytes per scanline |
623 | | int has_alpha, int width, int height, |
624 | | uint16_t* tmp_rgb, int y_stride, int uv_stride, |
625 | | int a_stride, uint8_t* dst_y, uint8_t* dst_u, |
626 | | uint8_t* dst_v, uint8_t* dst_a); |
627 | | void (*WebPImportYUVAFromRGBALastLine)( |
628 | | const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr, |
629 | | const uint8_t* a_ptr, |
630 | | int step, // bytes per pixel |
631 | | int has_alpha, int width, uint16_t* tmp_rgb, uint8_t* dst_y, uint8_t* dst_u, |
632 | | uint8_t* dst_v, uint8_t* dst_a); |
633 | | |
634 | | void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb, |
635 | | uint8_t* WEBP_RESTRICT y, int width); |
636 | | void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb, |
637 | | uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, |
638 | | int src_width, int do_store); |
639 | | |
640 | | extern void WebPInitConvertARGBToYUVSSE2(void); |
641 | | extern void WebPInitConvertARGBToYUVSSE41(void); |
642 | | extern void WebPInitConvertARGBToYUVNEON(void); |
643 | | |
644 | 1 | WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { |
645 | 1 | WebPConvertARGBToY = ConvertARGBToY_C; |
646 | 1 | WebPConvertARGBToUV = WebPConvertARGBToUV_C; |
647 | | |
648 | 1 | WebPConvertRGBToY = ConvertRGBToY_C; |
649 | 1 | WebPConvertBGRToY = ConvertBGRToY_C; |
650 | | |
651 | 1 | WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; |
652 | | |
653 | 1 | WebPImportYUVAFromRGBA = ImportYUVAFromRGBA_C; |
654 | 1 | WebPImportYUVAFromRGBALastLine = ImportYUVAFromRGBALastLine_C; |
655 | | |
656 | 1 | if (VP8GetCPUInfo != NULL) { |
657 | 1 | #if defined(WEBP_HAVE_SSE2) |
658 | 1 | if (VP8GetCPUInfo(kSSE2)) { |
659 | 1 | WebPInitConvertARGBToYUVSSE2(); |
660 | 1 | } |
661 | 1 | #endif // WEBP_HAVE_SSE2 |
662 | 1 | #if defined(WEBP_HAVE_SSE41) |
663 | 1 | if (VP8GetCPUInfo(kSSE4_1)) { |
664 | 1 | WebPInitConvertARGBToYUVSSE41(); |
665 | 1 | } |
666 | 1 | #endif // WEBP_HAVE_SSE41 |
667 | 1 | } |
668 | | |
669 | | #if defined(WEBP_HAVE_NEON) |
670 | | if (WEBP_NEON_OMIT_C_CODE || |
671 | | (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { |
672 | | WebPInitConvertARGBToYUVNEON(); |
673 | | } |
674 | | #endif // WEBP_HAVE_NEON |
675 | | |
676 | 1 | assert(WebPConvertARGBToY != NULL); |
677 | 1 | assert(WebPConvertARGBToUV != NULL); |
678 | 1 | assert(WebPConvertRGBToY != NULL); |
679 | 1 | assert(WebPConvertBGRToY != NULL); |
680 | | assert(WebPConvertRGBA32ToUV != NULL); |
681 | 1 | } |