/src/libwebp/src/dsp/rescaler.c
Line | Count | Source |
1 | | // Copyright 2014 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // Rescaling functions |
11 | | // |
12 | | // Author: Skal (pascal.massimino@gmail.com) |
13 | | |
14 | | #include <assert.h> |
15 | | #include <stddef.h> |
16 | | |
17 | | #include "src/dsp/cpu.h" |
18 | | #include "src/dsp/dsp.h" |
19 | | #include "src/utils/rescaler_utils.h" |
20 | | #include "src/webp/types.h" |
21 | | |
22 | | //------------------------------------------------------------------------------ |
23 | | // Implementations of critical functions ImportRow / ExportRow |
24 | | |
25 | 257M | #define ROUNDER (WEBP_RESCALER_ONE >> 1) |
26 | 200M | #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX) |
27 | 25.7M | #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX) |
28 | | |
29 | | //------------------------------------------------------------------------------ |
30 | | // Row import |
31 | | |
32 | | void WebPRescalerImportRowExpand_C(WebPRescaler* WEBP_RESTRICT const wrk, |
33 | 686k | const uint8_t* WEBP_RESTRICT src) { |
34 | 686k | const int x_stride = wrk->num_channels; |
35 | 686k | const int x_out_max = wrk->dst_width * wrk->num_channels; |
36 | 686k | int channel; |
37 | 686k | assert(!WebPRescalerInputDone(wrk)); |
38 | 686k | assert(wrk->x_expand); |
39 | 2.55M | for (channel = 0; channel < x_stride; ++channel) { |
40 | 1.86M | int x_in = channel; |
41 | 1.86M | int x_out = channel; |
42 | | // simple bilinear interpolation |
43 | 1.86M | int accum = wrk->x_add; |
44 | 1.86M | rescaler_t left = (rescaler_t)src[x_in]; |
45 | 1.86M | rescaler_t right = |
46 | 1.86M | (wrk->src_width > 1) ? (rescaler_t)src[x_in + x_stride] : left; |
47 | 1.86M | x_in += x_stride; |
48 | 41.9M | while (1) { |
49 | 41.9M | wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; |
50 | 41.9M | x_out += x_stride; |
51 | 41.9M | if (x_out >= x_out_max) break; |
52 | 40.0M | accum -= wrk->x_sub; |
53 | 40.0M | if (accum < 0) { |
54 | 26.6M | left = right; |
55 | 26.6M | x_in += x_stride; |
56 | 26.6M | assert(x_in < wrk->src_width * x_stride); |
57 | 26.6M | right = (rescaler_t)src[x_in]; |
58 | 26.6M | accum += wrk->x_add; |
59 | 26.6M | } |
60 | 40.0M | } |
61 | 1.86M | assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0); |
62 | 1.86M | } |
63 | 686k | } |
64 | | |
65 | | void WebPRescalerImportRowShrink_C(WebPRescaler* WEBP_RESTRICT const wrk, |
66 | 1.81M | const uint8_t* WEBP_RESTRICT src) { |
67 | 1.81M | const int x_stride = wrk->num_channels; |
68 | 1.81M | const int x_out_max = wrk->dst_width * wrk->num_channels; |
69 | 1.81M | int channel; |
70 | 1.81M | assert(!WebPRescalerInputDone(wrk)); |
71 | 1.81M | assert(!wrk->x_expand); |
72 | 4.25M | for (channel = 0; channel < x_stride; ++channel) { |
73 | 2.43M | int x_in = channel; |
74 | 2.43M | int x_out = channel; |
75 | 2.43M | uint32_t sum = 0; |
76 | 2.43M | int accum = 0; |
77 | 104M | while (x_out < x_out_max) { |
78 | 102M | uint32_t base = 0; |
79 | 102M | accum += wrk->x_add; |
80 | 285M | while (accum > 0) { |
81 | 183M | accum -= wrk->x_sub; |
82 | 183M | assert(x_in < wrk->src_width * x_stride); |
83 | 183M | base = src[x_in]; |
84 | 183M | sum += base; |
85 | 183M | x_in += x_stride; |
86 | 183M | } |
87 | 102M | { // Emit next horizontal pixel. |
88 | 102M | const rescaler_t frac = base * (-accum); |
89 | 102M | wrk->frow[x_out] = sum * wrk->x_sub - frac; |
90 | | // fresh fractional start for next pixel |
91 | 102M | sum = (int)MULT_FIX(frac, wrk->fx_scale); |
92 | 102M | } |
93 | 102M | x_out += x_stride; |
94 | 102M | } |
95 | 2.43M | assert(accum == 0); |
96 | 2.43M | } |
97 | 1.81M | } |
98 | | |
99 | | //------------------------------------------------------------------------------ |
100 | | // Row export |
101 | | |
102 | 376k | void WebPRescalerExportRowExpand_C(WebPRescaler* const wrk) { |
103 | 376k | int x_out; |
104 | 376k | uint8_t* const dst = wrk->dst; |
105 | 376k | rescaler_t* const irow = wrk->irow; |
106 | 376k | const int x_out_max = wrk->dst_width * wrk->num_channels; |
107 | 376k | const rescaler_t* const frow = wrk->frow; |
108 | 376k | assert(!WebPRescalerOutputDone(wrk)); |
109 | 376k | assert(wrk->y_accum <= 0); |
110 | 376k | assert(wrk->y_expand); |
111 | 376k | assert(wrk->y_sub != 0); |
112 | 376k | if (wrk->y_accum == 0) { |
113 | 1.89M | for (x_out = 0; x_out < x_out_max; ++x_out) { |
114 | 1.88M | const uint32_t J = frow[x_out]; |
115 | 1.88M | const int v = (int)MULT_FIX(J, wrk->fy_scale); |
116 | 1.88M | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
117 | 1.88M | } |
118 | 365k | } else { |
119 | 365k | const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); |
120 | 365k | const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); |
121 | 56.6M | for (x_out = 0; x_out < x_out_max; ++x_out) { |
122 | 56.3M | const uint64_t I = (uint64_t)A * frow[x_out] + (uint64_t)B * irow[x_out]; |
123 | 56.3M | const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); |
124 | 56.3M | const int v = (int)MULT_FIX(J, wrk->fy_scale); |
125 | 56.3M | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
126 | 56.3M | } |
127 | 365k | } |
128 | 376k | } |
129 | | |
130 | 386k | void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) { |
131 | 386k | int x_out; |
132 | 386k | uint8_t* const dst = wrk->dst; |
133 | 386k | rescaler_t* const irow = wrk->irow; |
134 | 386k | const int x_out_max = wrk->dst_width * wrk->num_channels; |
135 | 386k | const rescaler_t* const frow = wrk->frow; |
136 | 386k | const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum); |
137 | 386k | assert(!WebPRescalerOutputDone(wrk)); |
138 | 386k | assert(wrk->y_accum <= 0); |
139 | 386k | assert(!wrk->y_expand); |
140 | 386k | if (yscale) { |
141 | 26.0M | for (x_out = 0; x_out < x_out_max; ++x_out) { |
142 | 25.7M | const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(frow[x_out], yscale); |
143 | 25.7M | const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); |
144 | 25.7M | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
145 | 25.7M | irow[x_out] = frac; // new fractional start |
146 | 25.7M | } |
147 | 283k | } else { |
148 | 14.8M | for (x_out = 0; x_out < x_out_max; ++x_out) { |
149 | 14.7M | const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale); |
150 | 14.7M | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
151 | 14.7M | irow[x_out] = 0; |
152 | 14.7M | } |
153 | 103k | } |
154 | 386k | } |
155 | | |
156 | | #undef MULT_FIX_FLOOR |
157 | | #undef MULT_FIX |
158 | | #undef ROUNDER |
159 | | |
160 | | //------------------------------------------------------------------------------ |
161 | | // Main entry calls |
162 | | |
163 | | void WebPRescalerImportRow(WebPRescaler* WEBP_RESTRICT const wrk, |
164 | 4.42M | const uint8_t* WEBP_RESTRICT src) { |
165 | 4.42M | assert(!WebPRescalerInputDone(wrk)); |
166 | 4.42M | if (!wrk->x_expand) { |
167 | 3.19M | WebPRescalerImportRowShrink(wrk, src); |
168 | 3.19M | } else { |
169 | 1.22M | WebPRescalerImportRowExpand(wrk, src); |
170 | 1.22M | } |
171 | 4.42M | } |
172 | | |
173 | 4.85M | void WebPRescalerExportRow(WebPRescaler* const wrk) { |
174 | 4.85M | if (wrk->y_accum <= 0) { |
175 | 4.85M | assert(!WebPRescalerOutputDone(wrk)); |
176 | 4.85M | if (wrk->y_expand) { |
177 | 3.09M | WebPRescalerExportRowExpand(wrk); |
178 | 3.09M | } else if (wrk->fxy_scale) { |
179 | 1.75M | WebPRescalerExportRowShrink(wrk); |
180 | 1.75M | } else { // special case |
181 | 5.01k | int i; |
182 | 5.01k | assert(wrk->src_height == wrk->dst_height && wrk->x_add == 1); |
183 | 5.01k | assert(wrk->src_width == 1 && wrk->dst_width <= 2); |
184 | 21.8k | for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) { |
185 | 16.8k | wrk->dst[i] = wrk->irow[i]; |
186 | 16.8k | wrk->irow[i] = 0; |
187 | 16.8k | } |
188 | 5.01k | } |
189 | 4.85M | wrk->y_accum += wrk->y_add; |
190 | 4.85M | wrk->dst += wrk->dst_stride; |
191 | 4.85M | ++wrk->dst_y; |
192 | 4.85M | } |
193 | 4.85M | } |
194 | | |
195 | | //------------------------------------------------------------------------------ |
196 | | |
197 | | WebPRescalerImportRowFunc WebPRescalerImportRowExpand; |
198 | | WebPRescalerImportRowFunc WebPRescalerImportRowShrink; |
199 | | |
200 | | WebPRescalerExportRowFunc WebPRescalerExportRowExpand; |
201 | | WebPRescalerExportRowFunc WebPRescalerExportRowShrink; |
202 | | |
203 | | extern VP8CPUInfo VP8GetCPUInfo; |
204 | | extern void WebPRescalerDspInitSSE2(void); |
205 | | extern void WebPRescalerDspInitMIPS32(void); |
206 | | extern void WebPRescalerDspInitMIPSdspR2(void); |
207 | | extern void WebPRescalerDspInitMSA(void); |
208 | | extern void WebPRescalerDspInitNEON(void); |
209 | | |
210 | 6.08k | WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { |
211 | 6.08k | #if !defined(WEBP_REDUCE_SIZE) |
212 | 6.08k | #if !WEBP_NEON_OMIT_C_CODE |
213 | 6.08k | WebPRescalerExportRowExpand = WebPRescalerExportRowExpand_C; |
214 | 6.08k | WebPRescalerExportRowShrink = WebPRescalerExportRowShrink_C; |
215 | 6.08k | #endif |
216 | | |
217 | 6.08k | WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C; |
218 | 6.08k | WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C; |
219 | | |
220 | 6.08k | if (VP8GetCPUInfo != NULL) { |
221 | 6.08k | #if defined(WEBP_HAVE_SSE2) |
222 | 6.08k | if (VP8GetCPUInfo(kSSE2)) { |
223 | 4.33k | WebPRescalerDspInitSSE2(); |
224 | 4.33k | } |
225 | 6.08k | #endif |
226 | | #if defined(WEBP_USE_MIPS32) |
227 | | if (VP8GetCPUInfo(kMIPS32)) { |
228 | | WebPRescalerDspInitMIPS32(); |
229 | | } |
230 | | #endif |
231 | | #if defined(WEBP_USE_MIPS_DSP_R2) |
232 | | if (VP8GetCPUInfo(kMIPSdspR2)) { |
233 | | WebPRescalerDspInitMIPSdspR2(); |
234 | | } |
235 | | #endif |
236 | | #if defined(WEBP_USE_MSA) |
237 | | if (VP8GetCPUInfo(kMSA)) { |
238 | | WebPRescalerDspInitMSA(); |
239 | | } |
240 | | #endif |
241 | 6.08k | } |
242 | | |
243 | | #if defined(WEBP_HAVE_NEON) |
244 | | if (WEBP_NEON_OMIT_C_CODE || |
245 | | (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { |
246 | | WebPRescalerDspInitNEON(); |
247 | | } |
248 | | #endif |
249 | | |
250 | 6.08k | assert(WebPRescalerExportRowExpand != NULL); |
251 | 6.08k | assert(WebPRescalerExportRowShrink != NULL); |
252 | 6.08k | assert(WebPRescalerImportRowExpand != NULL); |
253 | | assert(WebPRescalerImportRowShrink != NULL); |
254 | 6.08k | #endif // WEBP_REDUCE_SIZE |
255 | 6.08k | } |