Coverage Report

Created: 2025-12-31 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libavcodec/snow_dwt.c
Line
Count
Source
1
/*
2
 * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
3
 * Copyright (C) 2008 David Conrad
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
#include "libavutil/attributes.h"
23
#include "libavutil/avassert.h"
24
#include "libavutil/common.h"
25
#include "libavutil/mem.h"
26
#include "me_cmp.h"
27
#include "snow_dwt.h"
28
29
int ff_slice_buffer_init(slice_buffer *buf, int line_count,
30
                         int max_allocated_lines, int line_width,
31
                         IDWTELEM *base_buffer)
32
64.4k
{
33
64.4k
    int i;
34
35
64.4k
    buf->base_buffer = base_buffer;
36
64.4k
    buf->line_count  = line_count;
37
64.4k
    buf->line_width  = line_width;
38
64.4k
    buf->data_count  = max_allocated_lines;
39
64.4k
    buf->line        = av_calloc(line_count, sizeof(*buf->line));
40
64.4k
    if (!buf->line)
41
0
        return AVERROR(ENOMEM);
42
64.4k
    buf->data_stack  = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *));
43
64.4k
    if (!buf->data_stack) {
44
0
        av_freep(&buf->line);
45
0
        return AVERROR(ENOMEM);
46
0
    }
47
48
2.31M
    for (i = 0; i < max_allocated_lines; i++) {
49
2.24M
        buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM));
50
2.24M
        if (!buf->data_stack[i]) {
51
0
            for (i--; i >=0; i--)
52
0
                av_freep(&buf->data_stack[i]);
53
0
            av_freep(&buf->data_stack);
54
0
            av_freep(&buf->line);
55
0
            return AVERROR(ENOMEM);
56
0
        }
57
2.24M
    }
58
59
64.4k
    buf->data_stack_top = max_allocated_lines - 1;
60
64.4k
    return 0;
61
64.4k
}
62
63
IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line)
64
44.7M
{
65
44.7M
    IDWTELEM *buffer;
66
67
44.7M
    av_assert0(buf->data_stack_top >= 0);
68
//  av_assert1(!buf->line[line]);
69
44.7M
    if (buf->line[line])
70
0
        return buf->line[line];
71
72
44.7M
    buffer = buf->data_stack[buf->data_stack_top];
73
44.7M
    buf->data_stack_top--;
74
44.7M
    buf->line[line] = buffer;
75
76
44.7M
    return buffer;
77
44.7M
}
78
79
void ff_slice_buffer_release(slice_buffer *buf, int line)
80
44.7M
{
81
44.7M
    IDWTELEM *buffer;
82
83
44.7M
    av_assert1(line >= 0 && line < buf->line_count);
84
44.7M
    av_assert1(buf->line[line]);
85
86
44.7M
    buffer = buf->line[line];
87
44.7M
    buf->data_stack_top++;
88
44.7M
    buf->data_stack[buf->data_stack_top] = buffer;
89
44.7M
    buf->line[line]                      = NULL;
90
44.7M
}
91
92
void ff_slice_buffer_flush(slice_buffer *buf)
93
125k
{
94
125k
    int i;
95
96
125k
    if (!buf->line)
97
4.00k
        return;
98
99
146M
    for (i = 0; i < buf->line_count; i++)
100
145M
        if (buf->line[i])
101
18.0k
            ff_slice_buffer_release(buf, i);
102
121k
}
103
104
void ff_slice_buffer_destroy(slice_buffer *buf)
105
68.4k
{
106
68.4k
    int i;
107
68.4k
    ff_slice_buffer_flush(buf);
108
109
68.4k
    if (buf->data_stack)
110
2.31M
        for (i = buf->data_count - 1; i >= 0; i--)
111
2.24M
            av_freep(&buf->data_stack[i]);
112
68.4k
    av_freep(&buf->data_stack);
113
68.4k
    av_freep(&buf->line);
114
68.4k
}
115
116
static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
117
                                  int dst_step, int src_step, int ref_step,
118
                                  int width, int mul, int add, int shift,
119
                                  int highpass, int inverse)
120
25.3M
{
121
25.3M
    const int mirror_left  = !highpass;
122
25.3M
    const int mirror_right = (width & 1) ^ highpass;
123
25.3M
    const int w            = (width >> 1) - 1 + (highpass & width);
124
25.3M
    int i;
125
126
428M
#define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
127
25.3M
    if (mirror_left) {
128
8.43M
        dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
129
8.43M
        dst   += dst_step;
130
8.43M
        src   += src_step;
131
8.43M
    }
132
133
429M
    for (i = 0; i < w; i++)
134
404M
        dst[i * dst_step] = LIFT(src[i * src_step],
135
25.3M
                                 ((mul * (ref[i * ref_step] +
136
25.3M
                                          ref[(i + 1) * ref_step]) +
137
25.3M
                                   add) >> shift),
138
25.3M
                                 inverse);
139
140
25.3M
    if (mirror_right)
141
15.2M
        dst[w * dst_step] = LIFT(src[w * src_step],
142
25.3M
                                 ((mul * 2 * ref[w * ref_step] + add) >> shift),
143
25.3M
                                 inverse);
144
25.3M
}
145
146
static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
147
                                   int dst_step, int src_step, int ref_step,
148
                                   int width, int mul, int add, int shift,
149
                                   int highpass, int inverse)
150
8.43M
{
151
8.43M
    const int mirror_left  = !highpass;
152
8.43M
    const int mirror_right = (width & 1) ^ highpass;
153
8.43M
    const int w            = (width >> 1) - 1 + (highpass & width);
154
8.43M
    int i;
155
156
8.43M
    av_assert1(shift == 4);
157
8.43M
#define LIFTS(src, ref, inv)                                            \
158
143M
    ((inv) ? (src) + (((ref) + 4 * (src)) >> shift)                     \
159
143M
           : -((-16 * (src) + (ref) + add /                             \
160
143M
                4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
161
8.43M
    if (mirror_left) {
162
8.43M
        dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
163
8.43M
        dst   += dst_step;
164
8.43M
        src   += src_step;
165
8.43M
    }
166
167
142M
    for (i = 0; i < w; i++)
168
133M
        dst[i * dst_step] = LIFTS(src[i * src_step],
169
8.43M
                                  mul * (ref[i * ref_step] +
170
8.43M
                                         ref[(i + 1) * ref_step]) + add,
171
8.43M
                                  inverse);
172
173
8.43M
    if (mirror_right)
174
1.62M
        dst[w * dst_step] = LIFTS(src[w * src_step],
175
8.43M
                                  mul * 2 * ref[w * ref_step] + add,
176
8.43M
                                  inverse);
177
8.43M
}
178
179
static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
180
0
{
181
0
    const int width2 = width >> 1;
182
0
    int x;
183
0
    const int w2 = (width + 1) >> 1;
184
185
0
    for (x = 0; x < width2; x++) {
186
0
        temp[x]      = b[2 * x];
187
0
        temp[x + w2] = b[2 * x + 1];
188
0
    }
189
0
    if (width & 1)
190
0
        temp[x] = b[2 * x];
191
0
    lift(b + w2, temp + w2, temp,   1, 1, 1, width, -1, 0, 1, 1, 0);
192
0
    lift(b,      temp,      b + w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
193
0
}
194
195
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
196
                                    int width)
197
0
{
198
0
    int i;
199
200
0
    for (i = 0; i < width; i++)
201
0
        b1[i] -= (b0[i] + b2[i]) >> 1;
202
0
}
203
204
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
205
                                    int width)
206
0
{
207
0
    int i;
208
209
0
    for (i = 0; i < width; i++)
210
0
        b1[i] += (b0[i] + b2[i] + 2) >> 2;
211
0
}
212
213
static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
214
                                 int width, int height, int stride)
215
0
{
216
0
    int y;
217
0
    DWTELEM *b0 = buffer + avpriv_mirror(-2 - 1, height - 1) * stride;
218
0
    DWTELEM *b1 = buffer + avpriv_mirror(-2,     height - 1) * stride;
219
220
0
    for (y = -2; y < height; y += 2) {
221
0
        DWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
222
0
        DWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
223
224
0
        if (y + 1 < (unsigned)height)
225
0
            horizontal_decompose53i(b2, temp, width);
226
0
        if (y + 2 < (unsigned)height)
227
0
            horizontal_decompose53i(b3, temp, width);
228
229
0
        if (y + 1 < (unsigned)height)
230
0
            vertical_decompose53iH0(b1, b2, b3, width);
231
0
        if (y + 0 < (unsigned)height)
232
0
            vertical_decompose53iL0(b0, b1, b2, width);
233
234
0
        b0 = b2;
235
0
        b1 = b3;
236
0
    }
237
0
}
238
239
static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
240
8.43M
{
241
8.43M
    const int w2 = (width + 1) >> 1;
242
243
8.43M
    lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
244
8.43M
    liftS(temp,     b,     temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
245
8.43M
    lift(b + w2, temp + w2, temp,     1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
246
8.43M
    lift(b,      temp,      b + w2,   1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
247
8.43M
}
248
249
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
250
                                    int width)
251
4.19M
{
252
4.19M
    int i;
253
254
144M
    for (i = 0; i < width; i++)
255
140M
        b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
256
4.19M
}
257
258
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
259
                                    int width)
260
4.19M
{
261
4.19M
    int i;
262
263
144M
    for (i = 0; i < width; i++)
264
140M
        b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
265
4.19M
}
266
267
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
268
                                    int width)
269
4.23M
{
270
4.23M
    int i;
271
272
149M
    for (i = 0; i < width; i++)
273
145M
        b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) /
274
145M
                (5 * 16) - (1 << 23);
275
4.23M
}
276
277
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
278
                                    int width)
279
4.23M
{
280
4.23M
    int i;
281
282
149M
    for (i = 0; i < width; i++)
283
145M
        b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
284
4.23M
}
285
286
static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
287
                                 int width, int height, int stride)
288
223k
{
289
223k
    int y;
290
223k
    DWTELEM *b0 = buffer + avpriv_mirror(-4 - 1, height - 1) * stride;
291
223k
    DWTELEM *b1 = buffer + avpriv_mirror(-4,     height - 1) * stride;
292
223k
    DWTELEM *b2 = buffer + avpriv_mirror(-4 + 1, height - 1) * stride;
293
223k
    DWTELEM *b3 = buffer + avpriv_mirror(-4 + 2, height - 1) * stride;
294
295
4.90M
    for (y = -4; y < height; y += 2) {
296
4.68M
        DWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
297
4.68M
        DWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
298
299
4.68M
        if (y + 3 < (unsigned)height)
300
4.19M
            horizontal_decompose97i(b4, temp, width);
301
4.68M
        if (y + 4 < (unsigned)height)
302
4.23M
            horizontal_decompose97i(b5, temp, width);
303
304
4.68M
        if (y + 3 < (unsigned)height)
305
4.19M
            vertical_decompose97iH0(b3, b4, b5, width);
306
4.68M
        if (y + 2 < (unsigned)height)
307
4.23M
            vertical_decompose97iL0(b2, b3, b4, width);
308
4.68M
        if (y + 1 < (unsigned)height)
309
4.19M
            vertical_decompose97iH1(b1, b2, b3, width);
310
4.68M
        if (y + 0 < (unsigned)height)
311
4.23M
            vertical_decompose97iL1(b0, b1, b2, width);
312
313
4.68M
        b0 = b2;
314
4.68M
        b1 = b3;
315
4.68M
        b2 = b4;
316
4.68M
        b3 = b5;
317
4.68M
    }
318
223k
}
319
320
void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
321
                    int stride, int type, int decomposition_count)
322
69.8k
{
323
69.8k
    int level;
324
325
293k
    for (level = 0; level < decomposition_count; level++) {
326
223k
        switch (type) {
327
223k
        case DWT_97:
328
223k
            spatial_decompose97i(buffer, temp,
329
223k
                                 width >> level, height >> level,
330
223k
                                 stride << level);
331
223k
            break;
332
0
        case DWT_53:
333
0
            spatial_decompose53i(buffer, temp,
334
0
                                 width >> level, height >> level,
335
0
                                 stride << level);
336
0
            break;
337
223k
        }
338
223k
    }
339
69.8k
}
340
341
static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
342
22.2M
{
343
22.2M
    const int width2 = width >> 1;
344
22.2M
    const int w2     = (width + 1) >> 1;
345
22.2M
    int x;
346
347
957M
    for (x = 0; x < width2; x++) {
348
935M
        temp[2 * x]     = b[x];
349
935M
        temp[2 * x + 1] = b[x + w2];
350
935M
    }
351
22.2M
    if (width & 1)
352
9.49M
        temp[2 * x] = b[x];
353
354
22.2M
    b[0] = temp[0] - ((temp[1] + 1) >> 1);
355
935M
    for (x = 2; x < width - 1; x += 2) {
356
913M
        b[x]     = temp[x]     - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
357
913M
        b[x - 1] = temp[x - 1] + ((b[x - 2]    + b[x]        + 1) >> 1);
358
913M
    }
359
22.2M
    if (width & 1) {
360
9.49M
        b[x]     = temp[x]     - ((temp[x - 1]     + 1) >> 1);
361
9.49M
        b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
362
9.49M
    } else
363
12.7M
        b[x - 1] = temp[x - 1] + b[x - 2];
364
22.2M
}
365
366
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
367
                                  int width)
368
26.7k
{
369
26.7k
    int i;
370
371
6.88M
    for (i = 0; i < width; i++)
372
6.85M
        b1[i] += (b0[i] + b2[i]) >> 1;
373
26.7k
}
374
375
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
376
                                  int width)
377
51.1k
{
378
51.1k
    int i;
379
380
10.9M
    for (i = 0; i < width; i++)
381
10.9M
        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
382
51.1k
}
383
384
static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb,
385
                                             int height, int stride_line)
386
51.1k
{
387
51.1k
    cs->b0 = slice_buffer_get_line(sb,
388
51.1k
                                   avpriv_mirror(-1 - 1, height - 1) * stride_line);
389
51.1k
    cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-1, height - 1) * stride_line);
390
51.1k
    cs->y  = -1;
391
51.1k
}
392
393
static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
394
                                    int height, int stride)
395
0
{
396
0
    cs->b0 = buffer + avpriv_mirror(-1 - 1, height - 1) * stride;
397
0
    cs->b1 = buffer + avpriv_mirror(-1,     height - 1) * stride;
398
0
    cs->y  = -1;
399
0
}
400
401
static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
402
                                           IDWTELEM *temp,
403
                                           int width, int height,
404
                                           int stride_line)
405
11.1M
{
406
11.1M
    int y = cs->y;
407
408
11.1M
    IDWTELEM *b0 = cs->b0;
409
11.1M
    IDWTELEM *b1 = cs->b1;
410
11.1M
    IDWTELEM *b2 = slice_buffer_get_line(sb,
411
11.1M
                                         avpriv_mirror(y + 1, height - 1) *
412
11.1M
                                         stride_line);
413
11.1M
    IDWTELEM *b3 = slice_buffer_get_line(sb,
414
11.1M
                                         avpriv_mirror(y + 2, height - 1) *
415
11.1M
                                         stride_line);
416
417
11.1M
    if (y + 1 < (unsigned)height && y < (unsigned)height) {
418
11.0M
        int x;
419
420
942M
        for (x = 0; x < width; x++) {
421
931M
            b2[x] -= (b1[x] + b3[x] + 2) >> 2;
422
931M
            b1[x] += (b0[x] + b2[x])     >> 1;
423
931M
        }
424
11.0M
    } else {
425
102k
        if (y + 1 < (unsigned)height)
426
51.1k
            vertical_compose53iL0(b1, b2, b3, width);
427
102k
        if (y + 0 < (unsigned)height)
428
26.7k
            vertical_compose53iH0(b0, b1, b2, width);
429
102k
    }
430
431
11.1M
    if (y - 1 < (unsigned)height)
432
11.1M
        horizontal_compose53i(b0, temp, width);
433
11.1M
    if (y + 0 < (unsigned)height)
434
11.1M
        horizontal_compose53i(b1, temp, width);
435
436
11.1M
    cs->b0  = b2;
437
11.1M
    cs->b1  = b3;
438
11.1M
    cs->y  += 2;
439
11.1M
}
440
441
static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
442
                                  IDWTELEM *temp, int width, int height,
443
                                  int stride)
444
0
{
445
0
    int y        = cs->y;
446
0
    IDWTELEM *b0 = cs->b0;
447
0
    IDWTELEM *b1 = cs->b1;
448
0
    IDWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
449
0
    IDWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
450
451
0
    if (y + 1 < (unsigned)height)
452
0
        vertical_compose53iL0(b1, b2, b3, width);
453
0
    if (y + 0 < (unsigned)height)
454
0
        vertical_compose53iH0(b0, b1, b2, width);
455
456
0
    if (y - 1 < (unsigned)height)
457
0
        horizontal_compose53i(b0, temp, width);
458
0
    if (y + 0 < (unsigned)height)
459
0
        horizontal_compose53i(b1, temp, width);
460
461
0
    cs->b0  = b2;
462
0
    cs->b1  = b3;
463
0
    cs->y  += 2;
464
0
}
465
466
static void snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
467
87.6M
{
468
87.6M
    const int w2 = (width + 1) >> 1;
469
87.6M
    int x;
470
471
87.6M
    temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
472
5.83G
    for (x = 1; x < (width >> 1); x++) {
473
5.74G
        temp[2 * x]     = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
474
5.74G
        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
475
5.74G
    }
476
87.6M
    if (width & 1) {
477
20.1M
        temp[2 * x]     = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
478
20.1M
        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
479
20.1M
    } else
480
67.5M
        temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
481
482
87.6M
    b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
483
5.83G
    for (x = 2; x < width - 1; x += 2) {
484
5.74G
        b[x]     = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
485
5.74G
        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
486
5.74G
    }
487
87.6M
    if (width & 1) {
488
20.1M
        b[x]     = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
489
20.1M
        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
490
20.1M
    } else
491
67.5M
        b[x - 1] = temp[x - 1] + 3 * b[x - 2];
492
87.6M
}
493
494
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
495
                                  int width)
496
23.1M
{
497
23.1M
    int i;
498
499
1.02G
    for (i = 0; i < width; i++)
500
997M
        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
501
23.1M
}
502
503
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
504
                                  int width)
505
23.1M
{
506
23.1M
    int i;
507
508
1.02G
    for (i = 0; i < width; i++)
509
997M
        b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
510
23.1M
}
511
512
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
513
                                  int width)
514
23.3M
{
515
23.3M
    int i;
516
517
1.06G
    for (i = 0; i < width; i++)
518
1.04G
        b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS;
519
23.3M
}
520
521
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
522
                                  int width)
523
23.3M
{
524
23.3M
    int i;
525
526
1.06G
    for (i = 0; i < width; i++)
527
1.04G
        b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
528
23.3M
}
529
530
static void snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
531
                                     IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
532
                                     int width)
533
20.5M
{
534
20.5M
    int i;
535
536
4.84G
    for (i = 0; i < width; i++) {
537
4.82G
        b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS;
538
4.82G
        b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS;
539
4.82G
        b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS;
540
4.82G
        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
541
4.82G
    }
542
20.5M
}
543
544
static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb,
545
                                             int height, int stride_line)
546
55.0k
{
547
55.0k
    cs->b0 = slice_buffer_get_line(sb, avpriv_mirror(-3 - 1, height - 1) * stride_line);
548
55.0k
    cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-3,     height - 1) * stride_line);
549
55.0k
    cs->b2 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 1, height - 1) * stride_line);
550
55.0k
    cs->b3 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 2, height - 1) * stride_line);
551
55.0k
    cs->y  = -3;
552
55.0k
}
553
554
static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height,
555
                                    int stride)
556
526k
{
557
526k
    cs->b0 = buffer + avpriv_mirror(-3 - 1, height - 1) * stride;
558
526k
    cs->b1 = buffer + avpriv_mirror(-3,     height - 1) * stride;
559
526k
    cs->b2 = buffer + avpriv_mirror(-3 + 1, height - 1) * stride;
560
526k
    cs->b3 = buffer + avpriv_mirror(-3 + 2, height - 1) * stride;
561
526k
    cs->y  = -3;
562
526k
}
563
564
static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs,
565
                                           slice_buffer * sb, IDWTELEM *temp,
566
                                           int width, int height,
567
                                           int stride_line)
568
20.8M
{
569
20.8M
    int y = cs->y;
570
571
20.8M
    IDWTELEM *b0 = cs->b0;
572
20.8M
    IDWTELEM *b1 = cs->b1;
573
20.8M
    IDWTELEM *b2 = cs->b2;
574
20.8M
    IDWTELEM *b3 = cs->b3;
575
20.8M
    IDWTELEM *b4 = slice_buffer_get_line(sb,
576
20.8M
                                         avpriv_mirror(y + 3, height - 1) *
577
20.8M
                                         stride_line);
578
20.8M
    IDWTELEM *b5 = slice_buffer_get_line(sb,
579
20.8M
                                         avpriv_mirror(y + 4, height - 1) *
580
20.8M
                                         stride_line);
581
582
20.8M
    if (y > 0 && y + 4 < height) {
583
20.5M
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
584
20.5M
    } else {
585
254k
        if (y + 3 < (unsigned)height)
586
144k
            vertical_compose97iL1(b3, b4, b5, width);
587
254k
        if (y + 2 < (unsigned)height)
588
107k
            vertical_compose97iH1(b2, b3, b4, width);
589
254k
        if (y + 1 < (unsigned)height)
590
144k
            vertical_compose97iL0(b1, b2, b3, width);
591
254k
        if (y + 0 < (unsigned)height)
592
107k
            vertical_compose97iH0(b0, b1, b2, width);
593
254k
    }
594
595
20.8M
    if (y - 1 < (unsigned)height)
596
20.7M
        dsp->horizontal_compose97i(b0, temp, width);
597
20.8M
    if (y + 0 < (unsigned)height)
598
20.6M
        dsp->horizontal_compose97i(b1, temp, width);
599
600
20.8M
    cs->b0  = b2;
601
20.8M
    cs->b1  = b3;
602
20.8M
    cs->b2  = b4;
603
20.8M
    cs->b3  = b5;
604
20.8M
    cs->y  += 2;
605
20.8M
}
606
607
static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
608
                                  IDWTELEM *temp, int width, int height,
609
                                  int stride)
610
24.2M
{
611
24.2M
    int y        = cs->y;
612
24.2M
    IDWTELEM *b0 = cs->b0;
613
24.2M
    IDWTELEM *b1 = cs->b1;
614
24.2M
    IDWTELEM *b2 = cs->b2;
615
24.2M
    IDWTELEM *b3 = cs->b3;
616
24.2M
    IDWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
617
24.2M
    IDWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
618
619
24.2M
    if (y + 3 < (unsigned)height)
620
23.1M
        vertical_compose97iL1(b3, b4, b5, width);
621
24.2M
    if (y + 2 < (unsigned)height)
622
23.0M
        vertical_compose97iH1(b2, b3, b4, width);
623
24.2M
    if (y + 1 < (unsigned)height)
624
23.1M
        vertical_compose97iL0(b1, b2, b3, width);
625
24.2M
    if (y + 0 < (unsigned)height)
626
23.0M
        vertical_compose97iH0(b0, b1, b2, width);
627
628
24.2M
    if (y - 1 < (unsigned)height)
629
23.1M
        snow_horizontal_compose97i(b0, temp, width);
630
24.2M
    if (y + 0 < (unsigned)height)
631
23.0M
        snow_horizontal_compose97i(b1, temp, width);
632
633
24.2M
    cs->b0  = b2;
634
24.2M
    cs->b1  = b3;
635
24.2M
    cs->b2  = b4;
636
24.2M
    cs->b3  = b5;
637
24.2M
    cs->y  += 2;
638
24.2M
}
639
640
void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
641
                                   int height, int stride_line, int type,
642
                                   int decomposition_count)
643
57.4k
{
644
57.4k
    int level;
645
163k
    for (level = decomposition_count - 1; level >= 0; level--) {
646
106k
        switch (type) {
647
55.0k
        case DWT_97:
648
55.0k
            spatial_compose97i_buffered_init(cs + level, sb, height >> level,
649
55.0k
                                             stride_line << level);
650
55.0k
            break;
651
51.1k
        case DWT_53:
652
51.1k
            spatial_compose53i_buffered_init(cs + level, sb, height >> level,
653
51.1k
                                             stride_line << level);
654
51.1k
            break;
655
106k
        }
656
106k
    }
657
57.4k
}
658
659
void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs,
660
                                    slice_buffer *slice_buf, IDWTELEM *temp,
661
                                    int width, int height, int stride_line,
662
                                    int type, int decomposition_count, int y)
663
11.3M
{
664
11.3M
    const int support = type == 1 ? 3 : 5;
665
11.3M
    int level;
666
11.3M
    if (type == 2)
667
0
        return;
668
669
35.3M
    for (level = decomposition_count - 1; level >= 0; level--)
670
55.9M
        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
671
32.0M
            switch (type) {
672
20.8M
            case DWT_97:
673
20.8M
                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp,
674
20.8M
                                               width >> level,
675
20.8M
                                               height >> level,
676
20.8M
                                               stride_line << level);
677
20.8M
                break;
678
11.1M
            case DWT_53:
679
11.1M
                spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
680
11.1M
                                               width >> level,
681
11.1M
                                               height >> level,
682
11.1M
                                               stride_line << level);
683
11.1M
                break;
684
32.0M
            }
685
32.0M
        }
686
11.3M
}
687
688
static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
689
                                 int height, int stride, int type,
690
                                 int decomposition_count)
691
152k
{
692
152k
    int level;
693
679k
    for (level = decomposition_count - 1; level >= 0; level--) {
694
526k
        switch (type) {
695
526k
        case DWT_97:
696
526k
            spatial_compose97i_init(cs + level, buffer, height >> level,
697
526k
                                    stride << level);
698
526k
            break;
699
0
        case DWT_53:
700
0
            spatial_compose53i_init(cs + level, buffer, height >> level,
701
0
                                    stride << level);
702
0
            break;
703
526k
        }
704
526k
    }
705
152k
}
706
707
static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
708
                                  IDWTELEM *temp, int width, int height,
709
                                  int stride, int type,
710
                                  int decomposition_count, int y)
711
7.50M
{
712
7.50M
    const int support = type == 1 ? 3 : 5;
713
7.50M
    int level;
714
7.50M
    if (type == 2)
715
0
        return;
716
717
29.0M
    for (level = decomposition_count - 1; level >= 0; level--)
718
45.7M
        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
719
24.2M
            switch (type) {
720
24.2M
            case DWT_97:
721
24.2M
                spatial_compose97i_dy(cs + level, buffer, temp, width >> level,
722
24.2M
                                      height >> level, stride << level);
723
24.2M
                break;
724
0
            case DWT_53:
725
0
                spatial_compose53i_dy(cs + level, buffer, temp, width >> level,
726
0
                                      height >> level, stride << level);
727
0
                break;
728
24.2M
            }
729
24.2M
        }
730
7.50M
}
731
732
void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
733
                     int stride, int type, int decomposition_count)
734
152k
{
735
152k
    DWTCompose cs[MAX_DECOMPOSITIONS];
736
152k
    int y;
737
152k
    spatial_idwt_init(cs, buffer, width, height, stride, type,
738
152k
                         decomposition_count);
739
7.66M
    for (y = 0; y < height; y += 4)
740
7.50M
        spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
741
7.50M
                              decomposition_count, y);
742
152k
}
743
744
static inline int w_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size,
745
                      int w, int h, int type)
746
0
{
747
0
    int s, i, j;
748
0
    const int dec_count = w == 8 ? 3 : 4;
749
0
    int tmp[32 * 32], tmp2[32];
750
0
    int level, ori;
751
0
    static const int scale[2][2][4][4] = {
752
0
        {
753
0
            { // 9/7 8x8 dec=3
754
0
                { 268, 239, 239, 213 },
755
0
                { 0,   224, 224, 152 },
756
0
                { 0,   135, 135, 110 },
757
0
            },
758
0
            { // 9/7 16x16 or 32x32 dec=4
759
0
                { 344, 310, 310, 280 },
760
0
                { 0,   320, 320, 228 },
761
0
                { 0,   175, 175, 136 },
762
0
                { 0,   129, 129, 102 },
763
0
            }
764
0
        },
765
0
        {
766
0
            { // 5/3 8x8 dec=3
767
0
                { 275, 245, 245, 218 },
768
0
                { 0,   230, 230, 156 },
769
0
                { 0,   138, 138, 113 },
770
0
            },
771
0
            { // 5/3 16x16 or 32x32 dec=4
772
0
                { 352, 317, 317, 286 },
773
0
                { 0,   328, 328, 233 },
774
0
                { 0,   180, 180, 140 },
775
0
                { 0,   132, 132, 105 },
776
0
            }
777
0
        }
778
0
    };
779
780
0
    for (i = 0; i < h; i++) {
781
0
        for (j = 0; j < w; j += 4) {
782
0
            tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) * (1 << 4);
783
0
            tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) * (1 << 4);
784
0
            tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) * (1 << 4);
785
0
            tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) * (1 << 4);
786
0
        }
787
0
        pix1 += line_size;
788
0
        pix2 += line_size;
789
0
    }
790
791
0
    ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
792
793
0
    s = 0;
794
0
    av_assert1(w == h);
795
0
    for (level = 0; level < dec_count; level++)
796
0
        for (ori = level ? 1 : 0; ori < 4; ori++) {
797
0
            int size   = w >> (dec_count - level);
798
0
            int sx     = (ori & 1) ? size : 0;
799
0
            int stride = 32 << (dec_count - level);
800
0
            int sy     = (ori & 2) ? stride >> 1 : 0;
801
802
0
            for (i = 0; i < size; i++)
803
0
                for (j = 0; j < size; j++) {
804
0
                    int v = tmp[sx + sy + i * stride + j] *
805
0
                            scale[type][dec_count - 3][level][ori];
806
0
                    s += FFABS(v);
807
0
                }
808
0
        }
809
0
    av_assert1(s >= 0);
810
0
    return s >> 9;
811
0
}
812
813
static int w53_8_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h)
814
0
{
815
0
    return w_c(v, pix1, pix2, line_size, 8, h, 1);
816
0
}
817
818
static int w97_8_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h)
819
0
{
820
0
    return w_c(v, pix1, pix2, line_size, 8, h, 0);
821
0
}
822
823
static int w53_16_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h)
824
0
{
825
0
    return w_c(v, pix1, pix2, line_size, 16, h, 1);
826
0
}
827
828
static int w97_16_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h)
829
0
{
830
0
    return w_c(v, pix1, pix2, line_size, 16, h, 0);
831
0
}
832
833
int ff_w53_32_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h)
834
0
{
835
0
    return w_c(v, pix1, pix2, line_size, 32, h, 1);
836
0
}
837
838
int ff_w97_32_c(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t line_size, int h)
839
0
{
840
0
    return w_c(v, pix1, pix2, line_size, 32, h, 0);
841
0
}
842
843
av_cold void ff_dsputil_init_dwt(MECmpContext *c)
844
133k
{
845
133k
    c->w53[0] = w53_16_c;
846
133k
    c->w53[1] = w53_8_c;
847
133k
    c->w97[0] = w97_16_c;
848
133k
    c->w97[1] = w97_8_c;
849
133k
}
850
851
av_cold void ff_dwt_init(SnowDWTContext *c)
852
8.05k
{
853
8.05k
    c->vertical_compose97i   = snow_vertical_compose97i;
854
8.05k
    c->horizontal_compose97i = snow_horizontal_compose97i;
855
8.05k
    c->inner_add_yblock      = ff_snow_inner_add_yblock;
856
857
#if ARCH_X86 && HAVE_MMX
858
    ff_dwt_init_x86(c);
859
#endif
860
8.05k
}