/src/ffmpeg/libavcodec/snow_dwt.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at> |
3 | | * Copyright (C) 2008 David Conrad |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include "libavutil/attributes.h" |
23 | | #include "libavutil/avassert.h" |
24 | | #include "libavutil/common.h" |
25 | | #include "libavutil/mem.h" |
26 | | #include "me_cmp.h" |
27 | | #include "snow_dwt.h" |
28 | | |
29 | | int ff_slice_buffer_init(slice_buffer *buf, int line_count, |
30 | | int max_allocated_lines, int line_width, |
31 | | IDWTELEM *base_buffer) |
32 | 64.4k | { |
33 | 64.4k | int i; |
34 | | |
35 | 64.4k | buf->base_buffer = base_buffer; |
36 | 64.4k | buf->line_count = line_count; |
37 | 64.4k | buf->line_width = line_width; |
38 | 64.4k | buf->data_count = max_allocated_lines; |
39 | 64.4k | buf->line = av_calloc(line_count, sizeof(*buf->line)); |
40 | 64.4k | if (!buf->line) |
41 | 0 | return AVERROR(ENOMEM); |
42 | 64.4k | buf->data_stack = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *)); |
43 | 64.4k | if (!buf->data_stack) { |
44 | 0 | av_freep(&buf->line); |
45 | 0 | return AVERROR(ENOMEM); |
46 | 0 | } |
47 | | |
48 | 2.31M | for (i = 0; i < max_allocated_lines; i++) { |
49 | 2.24M | buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM)); |
50 | 2.24M | if (!buf->data_stack[i]) { |
51 | 0 | for (i--; i >=0; i--) |
52 | 0 | av_freep(&buf->data_stack[i]); |
53 | 0 | av_freep(&buf->data_stack); |
54 | 0 | av_freep(&buf->line); |
55 | 0 | return AVERROR(ENOMEM); |
56 | 0 | } |
57 | 2.24M | } |
58 | | |
59 | 64.4k | buf->data_stack_top = max_allocated_lines - 1; |
60 | 64.4k | return 0; |
61 | 64.4k | } |
62 | | |
63 | | IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line) |
64 | 44.7M | { |
65 | 44.7M | IDWTELEM *buffer; |
66 | | |
67 | 44.7M | av_assert0(buf->data_stack_top >= 0); |
68 | | // av_assert1(!buf->line[line]); |
69 | 44.7M | if (buf->line[line]) |
70 | 0 | return buf->line[line]; |
71 | | |
72 | 44.7M | buffer = buf->data_stack[buf->data_stack_top]; |
73 | 44.7M | buf->data_stack_top--; |
74 | 44.7M | buf->line[line] = buffer; |
75 | | |
76 | 44.7M | return buffer; |
77 | 44.7M | } |
78 | | |
79 | | void ff_slice_buffer_release(slice_buffer *buf, int line) |
80 | 44.7M | { |
81 | 44.7M | IDWTELEM *buffer; |
82 | | |
83 | 44.7M | av_assert1(line >= 0 && line < buf->line_count); |
84 | 44.7M | av_assert1(buf->line[line]); |
85 | | |
86 | 44.7M | buffer = buf->line[line]; |
87 | 44.7M | buf->data_stack_top++; |
88 | 44.7M | buf->data_stack[buf->data_stack_top] = buffer; |
89 | 44.7M | buf->line[line] = NULL; |
90 | 44.7M | } |
91 | | |
92 | | void ff_slice_buffer_flush(slice_buffer *buf) |
93 | 125k | { |
94 | 125k | int i; |
95 | | |
96 | 125k | if (!buf->line) |
97 | 4.00k | return; |
98 | | |
99 | 146M | for (i = 0; i < buf->line_count; i++) |
100 | 145M | if (buf->line[i]) |
101 | 18.0k | ff_slice_buffer_release(buf, i); |
102 | 121k | } |
103 | | |
104 | | void ff_slice_buffer_destroy(slice_buffer *buf) |
105 | 68.4k | { |
106 | 68.4k | int i; |
107 | 68.4k | ff_slice_buffer_flush(buf); |
108 | | |
109 | 68.4k | if (buf->data_stack) |
110 | 2.31M | for (i = buf->data_count - 1; i >= 0; i--) |
111 | 2.24M | av_freep(&buf->data_stack[i]); |
112 | 68.4k | av_freep(&buf->data_stack); |
113 | 68.4k | av_freep(&buf->line); |
114 | 68.4k | } |
115 | | |
116 | | static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, |
117 | | int dst_step, int src_step, int ref_step, |
118 | | int width, int mul, int add, int shift, |
119 | | int highpass, int inverse) |
120 | 25.3M | { |
121 | 25.3M | const int mirror_left = !highpass; |
122 | 25.3M | const int mirror_right = (width & 1) ^ highpass; |
123 | 25.3M | const int w = (width >> 1) - 1 + (highpass & width); |
124 | 25.3M | int i; |
125 | | |
126 | 428M | #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref))) |
127 | 25.3M | if (mirror_left) { |
128 | 8.43M | dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse); |
129 | 8.43M | dst += dst_step; |
130 | 8.43M | src += src_step; |
131 | 8.43M | } |
132 | | |
133 | 429M | for (i = 0; i < w; i++) |
134 | 404M | dst[i * dst_step] = LIFT(src[i * src_step], |
135 | 25.3M | ((mul * (ref[i * ref_step] + |
136 | 25.3M | ref[(i + 1) * ref_step]) + |
137 | 25.3M | add) >> shift), |
138 | 25.3M | inverse); |
139 | | |
140 | 25.3M | if (mirror_right) |
141 | 15.2M | dst[w * dst_step] = LIFT(src[w * src_step], |
142 | 25.3M | ((mul * 2 * ref[w * ref_step] + add) >> shift), |
143 | 25.3M | inverse); |
144 | 25.3M | } |
145 | | |
146 | | static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, |
147 | | int dst_step, int src_step, int ref_step, |
148 | | int width, int mul, int add, int shift, |
149 | | int highpass, int inverse) |
150 | 8.43M | { |
151 | 8.43M | const int mirror_left = !highpass; |
152 | 8.43M | const int mirror_right = (width & 1) ^ highpass; |
153 | 8.43M | const int w = (width >> 1) - 1 + (highpass & width); |
154 | 8.43M | int i; |
155 | | |
156 | 8.43M | av_assert1(shift == 4); |
157 | 8.43M | #define LIFTS(src, ref, inv) \ |
158 | 143M | ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \ |
159 | 143M | : -((-16 * (src) + (ref) + add / \ |
160 | 143M | 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23))) |
161 | 8.43M | if (mirror_left) { |
162 | 8.43M | dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse); |
163 | 8.43M | dst += dst_step; |
164 | 8.43M | src += src_step; |
165 | 8.43M | } |
166 | | |
167 | 142M | for (i = 0; i < w; i++) |
168 | 133M | dst[i * dst_step] = LIFTS(src[i * src_step], |
169 | 8.43M | mul * (ref[i * ref_step] + |
170 | 8.43M | ref[(i + 1) * ref_step]) + add, |
171 | 8.43M | inverse); |
172 | | |
173 | 8.43M | if (mirror_right) |
174 | 1.62M | dst[w * dst_step] = LIFTS(src[w * src_step], |
175 | 8.43M | mul * 2 * ref[w * ref_step] + add, |
176 | 8.43M | inverse); |
177 | 8.43M | } |
178 | | |
179 | | static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width) |
180 | 0 | { |
181 | 0 | const int width2 = width >> 1; |
182 | 0 | int x; |
183 | 0 | const int w2 = (width + 1) >> 1; |
184 | |
|
185 | 0 | for (x = 0; x < width2; x++) { |
186 | 0 | temp[x] = b[2 * x]; |
187 | 0 | temp[x + w2] = b[2 * x + 1]; |
188 | 0 | } |
189 | 0 | if (width & 1) |
190 | 0 | temp[x] = b[2 * x]; |
191 | 0 | lift(b + w2, temp + w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
192 | 0 | lift(b, temp, b + w2, 1, 1, 1, width, 1, 2, 2, 0, 0); |
193 | 0 | } |
194 | | |
195 | | static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
196 | | int width) |
197 | 0 | { |
198 | 0 | int i; |
199 | |
|
200 | 0 | for (i = 0; i < width; i++) |
201 | 0 | b1[i] -= (b0[i] + b2[i]) >> 1; |
202 | 0 | } |
203 | | |
204 | | static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
205 | | int width) |
206 | 0 | { |
207 | 0 | int i; |
208 | |
|
209 | 0 | for (i = 0; i < width; i++) |
210 | 0 | b1[i] += (b0[i] + b2[i] + 2) >> 2; |
211 | 0 | } |
212 | | |
213 | | static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp, |
214 | | int width, int height, int stride) |
215 | 0 | { |
216 | 0 | int y; |
217 | 0 | DWTELEM *b0 = buffer + avpriv_mirror(-2 - 1, height - 1) * stride; |
218 | 0 | DWTELEM *b1 = buffer + avpriv_mirror(-2, height - 1) * stride; |
219 | |
|
220 | 0 | for (y = -2; y < height; y += 2) { |
221 | 0 | DWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride; |
222 | 0 | DWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride; |
223 | |
|
224 | 0 | if (y + 1 < (unsigned)height) |
225 | 0 | horizontal_decompose53i(b2, temp, width); |
226 | 0 | if (y + 2 < (unsigned)height) |
227 | 0 | horizontal_decompose53i(b3, temp, width); |
228 | |
|
229 | 0 | if (y + 1 < (unsigned)height) |
230 | 0 | vertical_decompose53iH0(b1, b2, b3, width); |
231 | 0 | if (y + 0 < (unsigned)height) |
232 | 0 | vertical_decompose53iL0(b0, b1, b2, width); |
233 | |
|
234 | 0 | b0 = b2; |
235 | 0 | b1 = b3; |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | | static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width) |
240 | 8.43M | { |
241 | 8.43M | const int w2 = (width + 1) >> 1; |
242 | | |
243 | 8.43M | lift(temp + w2, b + 1, b, 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
244 | 8.43M | liftS(temp, b, temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
245 | 8.43M | lift(b + w2, temp + w2, temp, 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
246 | 8.43M | lift(b, temp, b + w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
247 | 8.43M | } |
248 | | |
249 | | static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
250 | | int width) |
251 | 4.19M | { |
252 | 4.19M | int i; |
253 | | |
254 | 144M | for (i = 0; i < width; i++) |
255 | 140M | b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; |
256 | 4.19M | } |
257 | | |
258 | | static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
259 | | int width) |
260 | 4.19M | { |
261 | 4.19M | int i; |
262 | | |
263 | 144M | for (i = 0; i < width; i++) |
264 | 140M | b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; |
265 | 4.19M | } |
266 | | |
267 | | static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
268 | | int width) |
269 | 4.23M | { |
270 | 4.23M | int i; |
271 | | |
272 | 149M | for (i = 0; i < width; i++) |
273 | 145M | b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) / |
274 | 145M | (5 * 16) - (1 << 23); |
275 | 4.23M | } |
276 | | |
277 | | static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, |
278 | | int width) |
279 | 4.23M | { |
280 | 4.23M | int i; |
281 | | |
282 | 149M | for (i = 0; i < width; i++) |
283 | 145M | b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; |
284 | 4.23M | } |
285 | | |
286 | | static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp, |
287 | | int width, int height, int stride) |
288 | 223k | { |
289 | 223k | int y; |
290 | 223k | DWTELEM *b0 = buffer + avpriv_mirror(-4 - 1, height - 1) * stride; |
291 | 223k | DWTELEM *b1 = buffer + avpriv_mirror(-4, height - 1) * stride; |
292 | 223k | DWTELEM *b2 = buffer + avpriv_mirror(-4 + 1, height - 1) * stride; |
293 | 223k | DWTELEM *b3 = buffer + avpriv_mirror(-4 + 2, height - 1) * stride; |
294 | | |
295 | 4.90M | for (y = -4; y < height; y += 2) { |
296 | 4.68M | DWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride; |
297 | 4.68M | DWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride; |
298 | | |
299 | 4.68M | if (y + 3 < (unsigned)height) |
300 | 4.19M | horizontal_decompose97i(b4, temp, width); |
301 | 4.68M | if (y + 4 < (unsigned)height) |
302 | 4.23M | horizontal_decompose97i(b5, temp, width); |
303 | | |
304 | 4.68M | if (y + 3 < (unsigned)height) |
305 | 4.19M | vertical_decompose97iH0(b3, b4, b5, width); |
306 | 4.68M | if (y + 2 < (unsigned)height) |
307 | 4.23M | vertical_decompose97iL0(b2, b3, b4, width); |
308 | 4.68M | if (y + 1 < (unsigned)height) |
309 | 4.19M | vertical_decompose97iH1(b1, b2, b3, width); |
310 | 4.68M | if (y + 0 < (unsigned)height) |
311 | 4.23M | vertical_decompose97iL1(b0, b1, b2, width); |
312 | | |
313 | 4.68M | b0 = b2; |
314 | 4.68M | b1 = b3; |
315 | 4.68M | b2 = b4; |
316 | 4.68M | b3 = b5; |
317 | 4.68M | } |
318 | 223k | } |
319 | | |
320 | | void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height, |
321 | | int stride, int type, int decomposition_count) |
322 | 69.8k | { |
323 | 69.8k | int level; |
324 | | |
325 | 293k | for (level = 0; level < decomposition_count; level++) { |
326 | 223k | switch (type) { |
327 | 223k | case DWT_97: |
328 | 223k | spatial_decompose97i(buffer, temp, |
329 | 223k | width >> level, height >> level, |
330 | 223k | stride << level); |
331 | 223k | break; |
332 | 0 | case DWT_53: |
333 | 0 | spatial_decompose53i(buffer, temp, |
334 | 0 | width >> level, height >> level, |
335 | 0 | stride << level); |
336 | 0 | break; |
337 | 223k | } |
338 | 223k | } |
339 | 69.8k | } |
340 | | |
341 | | static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width) |
342 | 22.2M | { |
343 | 22.2M | const int width2 = width >> 1; |
344 | 22.2M | const int w2 = (width + 1) >> 1; |
345 | 22.2M | int x; |
346 | | |
347 | 957M | for (x = 0; x < width2; x++) { |
348 | 935M | temp[2 * x] = b[x]; |
349 | 935M | temp[2 * x + 1] = b[x + w2]; |
350 | 935M | } |
351 | 22.2M | if (width & 1) |
352 | 9.49M | temp[2 * x] = b[x]; |
353 | | |
354 | 22.2M | b[0] = temp[0] - ((temp[1] + 1) >> 1); |
355 | 935M | for (x = 2; x < width - 1; x += 2) { |
356 | 913M | b[x] = temp[x] - ((temp[x - 1] + temp[x + 1] + 2) >> 2); |
357 | 913M | b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); |
358 | 913M | } |
359 | 22.2M | if (width & 1) { |
360 | 9.49M | b[x] = temp[x] - ((temp[x - 1] + 1) >> 1); |
361 | 9.49M | b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); |
362 | 9.49M | } else |
363 | 12.7M | b[x - 1] = temp[x - 1] + b[x - 2]; |
364 | 22.2M | } |
365 | | |
366 | | static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
367 | | int width) |
368 | 26.7k | { |
369 | 26.7k | int i; |
370 | | |
371 | 6.88M | for (i = 0; i < width; i++) |
372 | 6.85M | b1[i] += (b0[i] + b2[i]) >> 1; |
373 | 26.7k | } |
374 | | |
375 | | static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
376 | | int width) |
377 | 51.1k | { |
378 | 51.1k | int i; |
379 | | |
380 | 10.9M | for (i = 0; i < width; i++) |
381 | 10.9M | b1[i] -= (b0[i] + b2[i] + 2) >> 2; |
382 | 51.1k | } |
383 | | |
384 | | static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb, |
385 | | int height, int stride_line) |
386 | 51.1k | { |
387 | 51.1k | cs->b0 = slice_buffer_get_line(sb, |
388 | 51.1k | avpriv_mirror(-1 - 1, height - 1) * stride_line); |
389 | 51.1k | cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-1, height - 1) * stride_line); |
390 | 51.1k | cs->y = -1; |
391 | 51.1k | } |
392 | | |
393 | | static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, |
394 | | int height, int stride) |
395 | 0 | { |
396 | 0 | cs->b0 = buffer + avpriv_mirror(-1 - 1, height - 1) * stride; |
397 | 0 | cs->b1 = buffer + avpriv_mirror(-1, height - 1) * stride; |
398 | 0 | cs->y = -1; |
399 | 0 | } |
400 | | |
401 | | static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb, |
402 | | IDWTELEM *temp, |
403 | | int width, int height, |
404 | | int stride_line) |
405 | 11.1M | { |
406 | 11.1M | int y = cs->y; |
407 | | |
408 | 11.1M | IDWTELEM *b0 = cs->b0; |
409 | 11.1M | IDWTELEM *b1 = cs->b1; |
410 | 11.1M | IDWTELEM *b2 = slice_buffer_get_line(sb, |
411 | 11.1M | avpriv_mirror(y + 1, height - 1) * |
412 | 11.1M | stride_line); |
413 | 11.1M | IDWTELEM *b3 = slice_buffer_get_line(sb, |
414 | 11.1M | avpriv_mirror(y + 2, height - 1) * |
415 | 11.1M | stride_line); |
416 | | |
417 | 11.1M | if (y + 1 < (unsigned)height && y < (unsigned)height) { |
418 | 11.0M | int x; |
419 | | |
420 | 942M | for (x = 0; x < width; x++) { |
421 | 931M | b2[x] -= (b1[x] + b3[x] + 2) >> 2; |
422 | 931M | b1[x] += (b0[x] + b2[x]) >> 1; |
423 | 931M | } |
424 | 11.0M | } else { |
425 | 102k | if (y + 1 < (unsigned)height) |
426 | 51.1k | vertical_compose53iL0(b1, b2, b3, width); |
427 | 102k | if (y + 0 < (unsigned)height) |
428 | 26.7k | vertical_compose53iH0(b0, b1, b2, width); |
429 | 102k | } |
430 | | |
431 | 11.1M | if (y - 1 < (unsigned)height) |
432 | 11.1M | horizontal_compose53i(b0, temp, width); |
433 | 11.1M | if (y + 0 < (unsigned)height) |
434 | 11.1M | horizontal_compose53i(b1, temp, width); |
435 | | |
436 | 11.1M | cs->b0 = b2; |
437 | 11.1M | cs->b1 = b3; |
438 | 11.1M | cs->y += 2; |
439 | 11.1M | } |
440 | | |
441 | | static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, |
442 | | IDWTELEM *temp, int width, int height, |
443 | | int stride) |
444 | 0 | { |
445 | 0 | int y = cs->y; |
446 | 0 | IDWTELEM *b0 = cs->b0; |
447 | 0 | IDWTELEM *b1 = cs->b1; |
448 | 0 | IDWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride; |
449 | 0 | IDWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride; |
450 | |
|
451 | 0 | if (y + 1 < (unsigned)height) |
452 | 0 | vertical_compose53iL0(b1, b2, b3, width); |
453 | 0 | if (y + 0 < (unsigned)height) |
454 | 0 | vertical_compose53iH0(b0, b1, b2, width); |
455 | |
|
456 | 0 | if (y - 1 < (unsigned)height) |
457 | 0 | horizontal_compose53i(b0, temp, width); |
458 | 0 | if (y + 0 < (unsigned)height) |
459 | 0 | horizontal_compose53i(b1, temp, width); |
460 | |
|
461 | 0 | cs->b0 = b2; |
462 | 0 | cs->b1 = b3; |
463 | 0 | cs->y += 2; |
464 | 0 | } |
465 | | |
466 | | static void snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width) |
467 | 87.6M | { |
468 | 87.6M | const int w2 = (width + 1) >> 1; |
469 | 87.6M | int x; |
470 | | |
471 | 87.6M | temp[0] = b[0] - ((3 * b[w2] + 2) >> 2); |
472 | 5.83G | for (x = 1; x < (width >> 1); x++) { |
473 | 5.74G | temp[2 * x] = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3); |
474 | 5.74G | temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; |
475 | 5.74G | } |
476 | 87.6M | if (width & 1) { |
477 | 20.1M | temp[2 * x] = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2); |
478 | 20.1M | temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; |
479 | 20.1M | } else |
480 | 67.5M | temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2]; |
481 | | |
482 | 87.6M | b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3); |
483 | 5.83G | for (x = 2; x < width - 1; x += 2) { |
484 | 5.74G | b[x] = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4); |
485 | 5.74G | b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); |
486 | 5.74G | } |
487 | 87.6M | if (width & 1) { |
488 | 20.1M | b[x] = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3); |
489 | 20.1M | b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); |
490 | 20.1M | } else |
491 | 67.5M | b[x - 1] = temp[x - 1] + 3 * b[x - 2]; |
492 | 87.6M | } |
493 | | |
494 | | static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
495 | | int width) |
496 | 23.1M | { |
497 | 23.1M | int i; |
498 | | |
499 | 1.02G | for (i = 0; i < width; i++) |
500 | 997M | b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; |
501 | 23.1M | } |
502 | | |
503 | | static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
504 | | int width) |
505 | 23.1M | { |
506 | 23.1M | int i; |
507 | | |
508 | 1.02G | for (i = 0; i < width; i++) |
509 | 997M | b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; |
510 | 23.1M | } |
511 | | |
512 | | static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
513 | | int width) |
514 | 23.3M | { |
515 | 23.3M | int i; |
516 | | |
517 | 1.06G | for (i = 0; i < width; i++) |
518 | 1.04G | b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS; |
519 | 23.3M | } |
520 | | |
521 | | static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
522 | | int width) |
523 | 23.3M | { |
524 | 23.3M | int i; |
525 | | |
526 | 1.06G | for (i = 0; i < width; i++) |
527 | 1.04G | b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; |
528 | 23.3M | } |
529 | | |
530 | | static void snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, |
531 | | IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, |
532 | | int width) |
533 | 20.5M | { |
534 | 20.5M | int i; |
535 | | |
536 | 4.84G | for (i = 0; i < width; i++) { |
537 | 4.82G | b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS; |
538 | 4.82G | b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS; |
539 | 4.82G | b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS; |
540 | 4.82G | b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; |
541 | 4.82G | } |
542 | 20.5M | } |
543 | | |
544 | | static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb, |
545 | | int height, int stride_line) |
546 | 55.0k | { |
547 | 55.0k | cs->b0 = slice_buffer_get_line(sb, avpriv_mirror(-3 - 1, height - 1) * stride_line); |
548 | 55.0k | cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-3, height - 1) * stride_line); |
549 | 55.0k | cs->b2 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 1, height - 1) * stride_line); |
550 | 55.0k | cs->b3 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 2, height - 1) * stride_line); |
551 | 55.0k | cs->y = -3; |
552 | 55.0k | } |
553 | | |
554 | | static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, |
555 | | int stride) |
556 | 526k | { |
557 | 526k | cs->b0 = buffer + avpriv_mirror(-3 - 1, height - 1) * stride; |
558 | 526k | cs->b1 = buffer + avpriv_mirror(-3, height - 1) * stride; |
559 | 526k | cs->b2 = buffer + avpriv_mirror(-3 + 1, height - 1) * stride; |
560 | 526k | cs->b3 = buffer + avpriv_mirror(-3 + 2, height - 1) * stride; |
561 | 526k | cs->y = -3; |
562 | 526k | } |
563 | | |
564 | | static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs, |
565 | | slice_buffer * sb, IDWTELEM *temp, |
566 | | int width, int height, |
567 | | int stride_line) |
568 | 20.8M | { |
569 | 20.8M | int y = cs->y; |
570 | | |
571 | 20.8M | IDWTELEM *b0 = cs->b0; |
572 | 20.8M | IDWTELEM *b1 = cs->b1; |
573 | 20.8M | IDWTELEM *b2 = cs->b2; |
574 | 20.8M | IDWTELEM *b3 = cs->b3; |
575 | 20.8M | IDWTELEM *b4 = slice_buffer_get_line(sb, |
576 | 20.8M | avpriv_mirror(y + 3, height - 1) * |
577 | 20.8M | stride_line); |
578 | 20.8M | IDWTELEM *b5 = slice_buffer_get_line(sb, |
579 | 20.8M | avpriv_mirror(y + 4, height - 1) * |
580 | 20.8M | stride_line); |
581 | | |
582 | 20.8M | if (y > 0 && y + 4 < height) { |
583 | 20.5M | dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
584 | 20.5M | } else { |
585 | 254k | if (y + 3 < (unsigned)height) |
586 | 144k | vertical_compose97iL1(b3, b4, b5, width); |
587 | 254k | if (y + 2 < (unsigned)height) |
588 | 107k | vertical_compose97iH1(b2, b3, b4, width); |
589 | 254k | if (y + 1 < (unsigned)height) |
590 | 144k | vertical_compose97iL0(b1, b2, b3, width); |
591 | 254k | if (y + 0 < (unsigned)height) |
592 | 107k | vertical_compose97iH0(b0, b1, b2, width); |
593 | 254k | } |
594 | | |
595 | 20.8M | if (y - 1 < (unsigned)height) |
596 | 20.7M | dsp->horizontal_compose97i(b0, temp, width); |
597 | 20.8M | if (y + 0 < (unsigned)height) |
598 | 20.6M | dsp->horizontal_compose97i(b1, temp, width); |
599 | | |
600 | 20.8M | cs->b0 = b2; |
601 | 20.8M | cs->b1 = b3; |
602 | 20.8M | cs->b2 = b4; |
603 | 20.8M | cs->b3 = b5; |
604 | 20.8M | cs->y += 2; |
605 | 20.8M | } |
606 | | |
607 | | static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, |
608 | | IDWTELEM *temp, int width, int height, |
609 | | int stride) |
610 | 24.2M | { |
611 | 24.2M | int y = cs->y; |
612 | 24.2M | IDWTELEM *b0 = cs->b0; |
613 | 24.2M | IDWTELEM *b1 = cs->b1; |
614 | 24.2M | IDWTELEM *b2 = cs->b2; |
615 | 24.2M | IDWTELEM *b3 = cs->b3; |
616 | 24.2M | IDWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride; |
617 | 24.2M | IDWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride; |
618 | | |
619 | 24.2M | if (y + 3 < (unsigned)height) |
620 | 23.1M | vertical_compose97iL1(b3, b4, b5, width); |
621 | 24.2M | if (y + 2 < (unsigned)height) |
622 | 23.0M | vertical_compose97iH1(b2, b3, b4, width); |
623 | 24.2M | if (y + 1 < (unsigned)height) |
624 | 23.1M | vertical_compose97iL0(b1, b2, b3, width); |
625 | 24.2M | if (y + 0 < (unsigned)height) |
626 | 23.0M | vertical_compose97iH0(b0, b1, b2, width); |
627 | | |
628 | 24.2M | if (y - 1 < (unsigned)height) |
629 | 23.1M | snow_horizontal_compose97i(b0, temp, width); |
630 | 24.2M | if (y + 0 < (unsigned)height) |
631 | 23.0M | snow_horizontal_compose97i(b1, temp, width); |
632 | | |
633 | 24.2M | cs->b0 = b2; |
634 | 24.2M | cs->b1 = b3; |
635 | 24.2M | cs->b2 = b4; |
636 | 24.2M | cs->b3 = b5; |
637 | 24.2M | cs->y += 2; |
638 | 24.2M | } |
639 | | |
640 | | void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width, |
641 | | int height, int stride_line, int type, |
642 | | int decomposition_count) |
643 | 57.4k | { |
644 | 57.4k | int level; |
645 | 163k | for (level = decomposition_count - 1; level >= 0; level--) { |
646 | 106k | switch (type) { |
647 | 55.0k | case DWT_97: |
648 | 55.0k | spatial_compose97i_buffered_init(cs + level, sb, height >> level, |
649 | 55.0k | stride_line << level); |
650 | 55.0k | break; |
651 | 51.1k | case DWT_53: |
652 | 51.1k | spatial_compose53i_buffered_init(cs + level, sb, height >> level, |
653 | 51.1k | stride_line << level); |
654 | 51.1k | break; |
655 | 106k | } |
656 | 106k | } |
657 | 57.4k | } |
658 | | |
659 | | void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs, |
660 | | slice_buffer *slice_buf, IDWTELEM *temp, |
661 | | int width, int height, int stride_line, |
662 | | int type, int decomposition_count, int y) |
663 | 11.3M | { |
664 | 11.3M | const int support = type == 1 ? 3 : 5; |
665 | 11.3M | int level; |
666 | 11.3M | if (type == 2) |
667 | 0 | return; |
668 | | |
669 | 35.3M | for (level = decomposition_count - 1; level >= 0; level--) |
670 | 55.9M | while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { |
671 | 32.0M | switch (type) { |
672 | 20.8M | case DWT_97: |
673 | 20.8M | spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp, |
674 | 20.8M | width >> level, |
675 | 20.8M | height >> level, |
676 | 20.8M | stride_line << level); |
677 | 20.8M | break; |
678 | 11.1M | case DWT_53: |
679 | 11.1M | spatial_compose53i_dy_buffered(cs + level, slice_buf, temp, |
680 | 11.1M | width >> level, |
681 | 11.1M | height >> level, |
682 | 11.1M | stride_line << level); |
683 | 11.1M | break; |
684 | 32.0M | } |
685 | 32.0M | } |
686 | 11.3M | } |
687 | | |
688 | | static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, |
689 | | int height, int stride, int type, |
690 | | int decomposition_count) |
691 | 152k | { |
692 | 152k | int level; |
693 | 679k | for (level = decomposition_count - 1; level >= 0; level--) { |
694 | 526k | switch (type) { |
695 | 526k | case DWT_97: |
696 | 526k | spatial_compose97i_init(cs + level, buffer, height >> level, |
697 | 526k | stride << level); |
698 | 526k | break; |
699 | 0 | case DWT_53: |
700 | 0 | spatial_compose53i_init(cs + level, buffer, height >> level, |
701 | 0 | stride << level); |
702 | 0 | break; |
703 | 526k | } |
704 | 526k | } |
705 | 152k | } |
706 | | |
707 | | static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, |
708 | | IDWTELEM *temp, int width, int height, |
709 | | int stride, int type, |
710 | | int decomposition_count, int y) |
711 | 7.50M | { |
712 | 7.50M | const int support = type == 1 ? 3 : 5; |
713 | 7.50M | int level; |
714 | 7.50M | if (type == 2) |
715 | 0 | return; |
716 | | |
717 | 29.0M | for (level = decomposition_count - 1; level >= 0; level--) |
718 | 45.7M | while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { |
719 | 24.2M | switch (type) { |
720 | 24.2M | case DWT_97: |
721 | 24.2M | spatial_compose97i_dy(cs + level, buffer, temp, width >> level, |
722 | 24.2M | height >> level, stride << level); |
723 | 24.2M | break; |
724 | 0 | case DWT_53: |
725 | 0 | spatial_compose53i_dy(cs + level, buffer, temp, width >> level, |
726 | 0 | height >> level, stride << level); |
727 | 0 | break; |
728 | 24.2M | } |
729 | 24.2M | } |
730 | 7.50M | } |
731 | | |
732 | | void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, |
733 | | int stride, int type, int decomposition_count) |
734 | 152k | { |
735 | 152k | DWTCompose cs[MAX_DECOMPOSITIONS]; |
736 | 152k | int y; |
737 | 152k | spatial_idwt_init(cs, buffer, width, height, stride, type, |
738 | 152k | decomposition_count); |
739 | 7.66M | for (y = 0; y < height; y += 4) |
740 | 7.50M | spatial_idwt_slice(cs, buffer, temp, width, height, stride, type, |
741 | 7.50M | decomposition_count, y); |
742 | 152k | } |
743 | | |
744 | | static inline int w_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, |
745 | | int w, int h, int type) |
746 | 0 | { |
747 | 0 | int s, i, j; |
748 | 0 | const int dec_count = w == 8 ? 3 : 4; |
749 | 0 | int tmp[32 * 32], tmp2[32]; |
750 | 0 | int level, ori; |
751 | 0 | static const int scale[2][2][4][4] = { |
752 | 0 | { |
753 | 0 | { // 9/7 8x8 dec=3 |
754 | 0 | { 268, 239, 239, 213 }, |
755 | 0 | { 0, 224, 224, 152 }, |
756 | 0 | { 0, 135, 135, 110 }, |
757 | 0 | }, |
758 | 0 | { // 9/7 16x16 or 32x32 dec=4 |
759 | 0 | { 344, 310, 310, 280 }, |
760 | 0 | { 0, 320, 320, 228 }, |
761 | 0 | { 0, 175, 175, 136 }, |
762 | 0 | { 0, 129, 129, 102 }, |
763 | 0 | } |
764 | 0 | }, |
765 | 0 | { |
766 | 0 | { // 5/3 8x8 dec=3 |
767 | 0 | { 275, 245, 245, 218 }, |
768 | 0 | { 0, 230, 230, 156 }, |
769 | 0 | { 0, 138, 138, 113 }, |
770 | 0 | }, |
771 | 0 | { // 5/3 16x16 or 32x32 dec=4 |
772 | 0 | { 352, 317, 317, 286 }, |
773 | 0 | { 0, 328, 328, 233 }, |
774 | 0 | { 0, 180, 180, 140 }, |
775 | 0 | { 0, 132, 132, 105 }, |
776 | 0 | } |
777 | 0 | } |
778 | 0 | }; |
779 | |
|
780 | 0 | for (i = 0; i < h; i++) { |
781 | 0 | for (j = 0; j < w; j += 4) { |
782 | 0 | tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) * (1 << 4); |
783 | 0 | tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) * (1 << 4); |
784 | 0 | tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) * (1 << 4); |
785 | 0 | tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) * (1 << 4); |
786 | 0 | } |
787 | 0 | pix1 += line_size; |
788 | 0 | pix2 += line_size; |
789 | 0 | } |
790 | |
|
791 | 0 | ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count); |
792 | |
|
793 | 0 | s = 0; |
794 | 0 | av_assert1(w == h); |
795 | 0 | for (level = 0; level < dec_count; level++) |
796 | 0 | for (ori = level ? 1 : 0; ori < 4; ori++) { |
797 | 0 | int size = w >> (dec_count - level); |
798 | 0 | int sx = (ori & 1) ? size : 0; |
799 | 0 | int stride = 32 << (dec_count - level); |
800 | 0 | int sy = (ori & 2) ? stride >> 1 : 0; |
801 | |
|
802 | 0 | for (i = 0; i < size; i++) |
803 | 0 | for (j = 0; j < size; j++) { |
804 | 0 | int v = tmp[sx + sy + i * stride + j] * |
805 | 0 | scale[type][dec_count - 3][level][ori]; |
806 | 0 | s += FFABS(v); |
807 | 0 | } |
808 | 0 | } |
809 | 0 | av_assert1(s >= 0); |
810 | 0 | return s >> 9; |
811 | 0 | } |
812 | | |
813 | | static int w53_8_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h) |
814 | 0 | { |
815 | 0 | return w_c(v, pix1, pix2, line_size, 8, h, 1); |
816 | 0 | } |
817 | | |
818 | | static int w97_8_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h) |
819 | 0 | { |
820 | 0 | return w_c(v, pix1, pix2, line_size, 8, h, 0); |
821 | 0 | } |
822 | | |
823 | | static int w53_16_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h) |
824 | 0 | { |
825 | 0 | return w_c(v, pix1, pix2, line_size, 16, h, 1); |
826 | 0 | } |
827 | | |
828 | | static int w97_16_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h) |
829 | 0 | { |
830 | 0 | return w_c(v, pix1, pix2, line_size, 16, h, 0); |
831 | 0 | } |
832 | | |
833 | | int ff_w53_32_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h) |
834 | 0 | { |
835 | 0 | return w_c(v, pix1, pix2, line_size, 32, h, 1); |
836 | 0 | } |
837 | | |
838 | | int ff_w97_32_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h) |
839 | 0 | { |
840 | 0 | return w_c(v, pix1, pix2, line_size, 32, h, 0); |
841 | 0 | } |
842 | | |
843 | | av_cold void ff_dsputil_init_dwt(MECmpContext *c) |
844 | 133k | { |
845 | 133k | c->w53[0] = w53_16_c; |
846 | 133k | c->w53[1] = w53_8_c; |
847 | 133k | c->w97[0] = w97_16_c; |
848 | 133k | c->w97[1] = w97_8_c; |
849 | 133k | } |
850 | | |
851 | | av_cold void ff_dwt_init(SnowDWTContext *c) |
852 | 8.05k | { |
853 | 8.05k | c->vertical_compose97i = snow_vertical_compose97i; |
854 | 8.05k | c->horizontal_compose97i = snow_horizontal_compose97i; |
855 | 8.05k | c->inner_add_yblock = ff_snow_inner_add_yblock; |
856 | | |
857 | | #if ARCH_X86 && HAVE_MMX |
858 | | ff_dwt_init_x86(c); |
859 | | #endif |
860 | 8.05k | } |