Coverage Report

Created: 2026-03-07 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/dav1d/src/cdef_apply_tmpl.c
Line
Count
Source
1
/*
2
 * Copyright © 2018, VideoLAN and dav1d authors
3
 * Copyright © 2018, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <string.h>
31
32
#include "common/intops.h"
33
34
#include "src/cdef_apply.h"
35
36
enum Backup2x8Flags {
37
    BACKUP_2X8_Y = 1 << 0,
38
    BACKUP_2X8_UV = 1 << 1,
39
};
40
41
static void backup2lines(pixel *const dst[3], /*const*/ pixel *const src[3],
42
                         const ptrdiff_t stride[2],
43
                         const enum Dav1dPixelLayout layout)
44
6.28M
{
45
6.28M
    const ptrdiff_t y_stride = PXSTRIDE(stride[0]);
46
6.28M
    if (y_stride < 0)
47
0
        pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride);
48
6.28M
    else
49
6.28M
        pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride);
50
51
6.28M
    if (layout != DAV1D_PIXEL_LAYOUT_I400) {
52
1.59M
        const ptrdiff_t uv_stride = PXSTRIDE(stride[1]);
53
1.59M
        if (uv_stride < 0) {
54
0
            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7;
55
0
            pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride);
56
0
            pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride);
57
1.59M
        } else {
58
1.59M
            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6;
59
1.59M
            pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride);
60
1.59M
            pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride);
61
1.59M
        }
62
1.59M
    }
63
6.28M
}
64
65
static void backup2x8(pixel dst[3][8][2],
66
                      /*const*/ pixel *const src[3],
67
                      const ptrdiff_t src_stride[2], int x_off,
68
                      const enum Dav1dPixelLayout layout,
69
                      const enum Backup2x8Flags flag)
70
7.54M
{
71
7.54M
    ptrdiff_t y_off = 0;
72
7.54M
    if (flag & BACKUP_2X8_Y) {
73
62.7M
        for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
74
55.6M
            pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
75
7.09M
    }
76
77
7.54M
    if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV))
78
1.39M
        return;
79
80
6.14M
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
81
6.14M
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
82
83
6.14M
    x_off >>= ss_hor;
84
6.14M
    y_off = 0;
85
31.5M
    for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
86
25.4M
        pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
87
25.4M
        pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
88
25.4M
    }
89
6.14M
}
90
91
6.10M
static int adjust_strength(const int strength, const unsigned var) {
92
6.10M
    if (!var) return 0;
93
4.51M
    const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
94
4.51M
    return (strength * (4 + i) + 8) >> 4;
95
6.10M
}
96
97
void bytefn(dav1d_cdef_brow)(Dav1dTaskContext *const tc,
98
                             pixel *const p[3],
99
                             const Av1Filter *const lflvl,
100
                             const int by_start, const int by_end,
101
                             const int sbrow_start, const int sby)
102
987k
{
103
987k
    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
104
987k
    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
105
987k
    const Dav1dDSPContext *const dsp = f->dsp;
106
987k
    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
107
987k
    pixel *ptrs[3] = { p[0], p[1], p[2] };
108
987k
    const int sbsz = 16;
109
987k
    const int sb64w = f->sb128w << 1;
110
987k
    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
111
987k
    const enum Dav1dPixelLayout layout = f->cur.p.layout;
112
987k
    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
113
987k
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
114
987k
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
115
987k
    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
116
987k
                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
117
987k
    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
118
987k
    const int have_tt = f->c->n_tc > 1;
119
987k
    const int sb128 = f->seq_hdr->sb128;
120
987k
    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
121
987k
    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
122
987k
    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
123
124
7.70M
    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
125
6.73M
        const int tf = tc->top_pre_cdef_toggle;
126
6.73M
        const int by_idx = (by & 30) >> 1;
127
6.73M
        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
128
129
6.73M
        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
130
6.29M
            edges & CDEF_HAVE_BOTTOM)
131
6.29M
        {
132
            // backup pre-filter data for next iteration
133
6.29M
            pixel *const cdef_top_bak[3] = {
134
6.29M
                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
135
6.29M
                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
136
6.29M
                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
137
6.29M
            };
138
6.29M
            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
139
6.29M
        }
140
141
6.73M
        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
142
6.73M
        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
143
6.73M
        edges &= ~CDEF_HAVE_LEFT;
144
6.73M
        edges |= CDEF_HAVE_RIGHT;
145
6.73M
        enum Backup2x8Flags prev_flag = 0;
146
22.2M
        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
147
15.5M
            const int sb128x = sbx >> 1;
148
15.5M
            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
149
15.5M
            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
150
15.5M
            if (cdef_idx == -1 ||
151
2.76M
                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
152
1.46M
                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
153
14.1M
            {
154
14.1M
                prev_flag = 0;
155
14.1M
                goto next_sb;
156
14.1M
            }
157
158
            // Create a complete 32-bit mask for the sb row ahead of time.
159
1.38M
            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
160
1.38M
            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
161
1.38M
                                                    noskip_row[0][0];
162
163
1.38M
            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
164
1.38M
            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
165
1.38M
            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
166
167
1.38M
            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
168
1.38M
            int y_sec_lvl = y_lvl & 3;
169
1.38M
            y_sec_lvl += y_sec_lvl == 3;
170
1.38M
            y_sec_lvl <<= bitdepth_min_8;
171
172
1.38M
            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
173
1.38M
            int uv_sec_lvl = uv_lvl & 3;
174
1.38M
            uv_sec_lvl += uv_sec_lvl == 3;
175
1.38M
            uv_sec_lvl <<= bitdepth_min_8;
176
177
1.38M
            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
178
10.6M
            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
179
9.26M
                 bx += 2, edges |= CDEF_HAVE_LEFT)
180
9.19M
            {
181
9.19M
                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
182
183
                // check if this 8x8 block had any coded coefficients; if not,
184
                // go to the next block
185
9.19M
                const uint32_t bx_mask = 3U << (bx & 30);
186
9.19M
                if (!(noskip_mask & bx_mask)) {
187
1.68M
                    prev_flag = 0;
188
1.68M
                    goto next_b;
189
1.68M
                }
190
7.50M
                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
191
7.50M
                prev_flag = flag;
192
7.50M
                if (do_left && edges & CDEF_HAVE_LEFT) {
193
                    // we didn't backup the prefilter data because it wasn't
194
                    // there, so do it here instead
195
271k
                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
196
271k
                }
197
7.50M
                if (edges & CDEF_HAVE_RIGHT) {
198
                    // backup pre-filter data for next iteration
199
7.30M
                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
200
7.30M
                }
201
202
7.50M
                int dir;
203
7.50M
                unsigned variance;
204
7.50M
                if (y_pri_lvl || uv_pri_lvl)
205
6.76M
                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
206
6.76M
                                        &variance HIGHBD_CALL_SUFFIX);
207
208
7.50M
                const pixel *top, *bot;
209
7.50M
                ptrdiff_t offset;
210
211
7.50M
                if (!have_tt) goto st_y;
212
6.60M
                if (sbrow_start && by == by_start) {
213
430k
                    if (resize) {
214
38.1k
                        offset = (sby - 1) * 4 * y_stride + bx * 4;
215
38.1k
                        top = &f->lf.cdef_lpf_line[0][offset];
216
391k
                    } else {
217
391k
                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
218
391k
                        top = &f->lf.lr_lpf_line[0][offset];
219
391k
                    }
220
430k
                    bot = bptrs[0] + 8 * y_stride;
221
6.17M
                } else if (!sbrow_start && by + 2 >= by_end) {
222
527k
                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
223
527k
                    if (resize) {
224
58.9k
                        offset = (sby * 4 + 2) * y_stride + bx * 4;
225
58.9k
                        bot = &f->lf.cdef_lpf_line[0][offset];
226
468k
                    } else {
227
468k
                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
228
468k
                        offset = line * y_stride + bx * 4;
229
468k
                        bot = &f->lf.lr_lpf_line[0][offset];
230
468k
                    }
231
5.65M
                } else {
232
6.45M
            st_y:;
233
6.45M
                    offset = sby * 4 * y_stride;
234
6.45M
                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
235
6.45M
                    bot = bptrs[0] + 8 * y_stride;
236
6.45M
                }
237
7.41M
                if (y_pri_lvl) {
238
6.10M
                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
239
6.10M
                    if (adj_y_pri_lvl || y_sec_lvl)
240
5.07M
                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
241
5.07M
                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
242
5.07M
                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
243
6.10M
                } else if (y_sec_lvl)
244
741k
                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
245
741k
                                    top, bot, 0, y_sec_lvl, 0, damping,
246
741k
                                    edges HIGHBD_CALL_SUFFIX);
247
248
7.41M
                if (!uv_lvl) goto skip_uv;
249
7.41M
                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
250
251
5.95M
                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
252
17.9M
                for (int pl = 1; pl <= 2; pl++) {
253
11.8M
                    if (!have_tt) goto st_uv;
254
10.6M
                    if (sbrow_start && by == by_start) {
255
656k
                        if (resize) {
256
46.8k
                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
257
46.8k
                            top = &f->lf.cdef_lpf_line[pl][offset];
258
609k
                        } else {
259
609k
                            const int line = sby * (4 << sb128) - 4;
260
609k
                            offset = line * uv_stride + (bx * 4 >> ss_hor);
261
609k
                            top = &f->lf.lr_lpf_line[pl][offset];
262
609k
                        }
263
656k
                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
264
10.0M
                    } else if (!sbrow_start && by + 2 >= by_end) {
265
764k
                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
266
764k
                                                     (bx * 4 >> ss_hor);
267
764k
                        top = &f->lf.cdef_line[tf][pl][top_offset];
268
764k
                        if (resize) {
269
56.5k
                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
270
56.5k
                            bot = &f->lf.cdef_lpf_line[pl][offset];
271
708k
                        } else {
272
708k
                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
273
708k
                            offset = line * uv_stride + (bx * 4 >> ss_hor);
274
708k
                            bot = &f->lf.lr_lpf_line[pl][offset];
275
708k
                        }
276
9.26M
                    } else {
277
10.5M
                st_uv:;
278
10.5M
                        const ptrdiff_t offset = sby * 8 * uv_stride;
279
10.5M
                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
280
10.5M
                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
281
10.5M
                    }
282
11.9M
                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
283
11.9M
                                         lr_bak[bit][pl], top, bot,
284
11.9M
                                         uv_pri_lvl, uv_sec_lvl, uvdir,
285
11.9M
                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
286
11.9M
                }
287
288
7.61M
            skip_uv:
289
7.61M
                bit ^= 1;
290
291
9.26M
            next_b:
292
9.26M
                bptrs[0] += 8;
293
9.26M
                bptrs[1] += 8 >> ss_hor;
294
9.26M
                bptrs[2] += 8 >> ss_hor;
295
9.26M
            }
296
297
15.4M
        next_sb:
298
15.4M
            iptrs[0] += sbsz * 4;
299
15.4M
            iptrs[1] += sbsz * 4 >> ss_hor;
300
15.4M
            iptrs[2] += sbsz * 4 >> ss_hor;
301
15.4M
        }
302
303
6.72M
        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
304
6.72M
        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
305
6.72M
        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
306
6.72M
        tc->top_pre_cdef_toggle ^= 1;
307
6.72M
    }
308
987k
}
dav1d_cdef_brow_8bpc
Line
Count
Source
102
219k
{
103
219k
    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
104
219k
    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
105
219k
    const Dav1dDSPContext *const dsp = f->dsp;
106
219k
    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
107
219k
    pixel *ptrs[3] = { p[0], p[1], p[2] };
108
219k
    const int sbsz = 16;
109
219k
    const int sb64w = f->sb128w << 1;
110
219k
    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
111
219k
    const enum Dav1dPixelLayout layout = f->cur.p.layout;
112
219k
    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
113
219k
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
114
219k
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
115
219k
    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
116
219k
                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
117
219k
    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
118
219k
    const int have_tt = f->c->n_tc > 1;
119
219k
    const int sb128 = f->seq_hdr->sb128;
120
219k
    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
121
219k
    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
122
219k
    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
123
124
1.29M
    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
125
1.07M
        const int tf = tc->top_pre_cdef_toggle;
126
1.07M
        const int by_idx = (by & 30) >> 1;
127
1.07M
        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
128
129
1.07M
        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
130
987k
            edges & CDEF_HAVE_BOTTOM)
131
984k
        {
132
            // backup pre-filter data for next iteration
133
984k
            pixel *const cdef_top_bak[3] = {
134
984k
                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
135
984k
                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
136
984k
                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
137
984k
            };
138
984k
            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
139
984k
        }
140
141
1.07M
        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
142
1.07M
        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
143
1.07M
        edges &= ~CDEF_HAVE_LEFT;
144
1.07M
        edges |= CDEF_HAVE_RIGHT;
145
1.07M
        enum Backup2x8Flags prev_flag = 0;
146
4.49M
        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
147
3.41M
            const int sb128x = sbx >> 1;
148
3.41M
            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
149
3.41M
            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
150
3.41M
            if (cdef_idx == -1 ||
151
1.09M
                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
152
354k
                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
153
2.62M
            {
154
2.62M
                prev_flag = 0;
155
2.62M
                goto next_sb;
156
2.62M
            }
157
158
            // Create a complete 32-bit mask for the sb row ahead of time.
159
794k
            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
160
794k
            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
161
794k
                                                    noskip_row[0][0];
162
163
794k
            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
164
794k
            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
165
794k
            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
166
167
794k
            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
168
794k
            int y_sec_lvl = y_lvl & 3;
169
794k
            y_sec_lvl += y_sec_lvl == 3;
170
794k
            y_sec_lvl <<= bitdepth_min_8;
171
172
794k
            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
173
794k
            int uv_sec_lvl = uv_lvl & 3;
174
794k
            uv_sec_lvl += uv_sec_lvl == 3;
175
794k
            uv_sec_lvl <<= bitdepth_min_8;
176
177
794k
            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
178
6.71M
            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
179
5.92M
                 bx += 2, edges |= CDEF_HAVE_LEFT)
180
5.86M
            {
181
5.86M
                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
182
183
                // check if this 8x8 block had any coded coefficients; if not,
184
                // go to the next block
185
5.86M
                const uint32_t bx_mask = 3U << (bx & 30);
186
5.86M
                if (!(noskip_mask & bx_mask)) {
187
1.28M
                    prev_flag = 0;
188
1.28M
                    goto next_b;
189
1.28M
                }
190
4.57M
                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
191
4.57M
                prev_flag = flag;
192
4.57M
                if (do_left && edges & CDEF_HAVE_LEFT) {
193
                    // we didn't backup the prefilter data because it wasn't
194
                    // there, so do it here instead
195
216k
                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
196
216k
                }
197
4.57M
                if (edges & CDEF_HAVE_RIGHT) {
198
                    // backup pre-filter data for next iteration
199
4.52M
                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
200
4.52M
                }
201
202
4.57M
                int dir;
203
4.57M
                unsigned variance;
204
4.57M
                if (y_pri_lvl || uv_pri_lvl)
205
4.17M
                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
206
4.17M
                                        &variance HIGHBD_CALL_SUFFIX);
207
208
4.57M
                const pixel *top, *bot;
209
4.57M
                ptrdiff_t offset;
210
211
4.57M
                if (!have_tt) goto st_y;
212
3.94M
                if (sbrow_start && by == by_start) {
213
256k
                    if (resize) {
214
7.71k
                        offset = (sby - 1) * 4 * y_stride + bx * 4;
215
7.71k
                        top = &f->lf.cdef_lpf_line[0][offset];
216
249k
                    } else {
217
249k
                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
218
249k
                        top = &f->lf.lr_lpf_line[0][offset];
219
249k
                    }
220
256k
                    bot = bptrs[0] + 8 * y_stride;
221
3.68M
                } else if (!sbrow_start && by + 2 >= by_end) {
222
299k
                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
223
299k
                    if (resize) {
224
9.15k
                        offset = (sby * 4 + 2) * y_stride + bx * 4;
225
9.15k
                        bot = &f->lf.cdef_lpf_line[0][offset];
226
290k
                    } else {
227
290k
                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
228
290k
                        offset = line * y_stride + bx * 4;
229
290k
                        bot = &f->lf.lr_lpf_line[0][offset];
230
290k
                    }
231
3.38M
                } else {
232
4.02M
            st_y:;
233
4.02M
                    offset = sby * 4 * y_stride;
234
4.02M
                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
235
4.02M
                    bot = bptrs[0] + 8 * y_stride;
236
4.02M
                }
237
4.58M
                if (y_pri_lvl) {
238
3.68M
                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
239
3.68M
                    if (adj_y_pri_lvl || y_sec_lvl)
240
3.20M
                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
241
3.20M
                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
242
3.20M
                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
243
3.68M
                } else if (y_sec_lvl)
244
595k
                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
245
595k
                                    top, bot, 0, y_sec_lvl, 0, damping,
246
595k
                                    edges HIGHBD_CALL_SUFFIX);
247
248
4.58M
                if (!uv_lvl) goto skip_uv;
249
4.58M
                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
250
251
4.08M
                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
252
12.2M
                for (int pl = 1; pl <= 2; pl++) {
253
8.16M
                    if (!have_tt) goto st_uv;
254
7.16M
                    if (sbrow_start && by == by_start) {
255
442k
                        if (resize) {
256
14.6k
                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
257
14.6k
                            top = &f->lf.cdef_lpf_line[pl][offset];
258
427k
                        } else {
259
427k
                            const int line = sby * (4 << sb128) - 4;
260
427k
                            offset = line * uv_stride + (bx * 4 >> ss_hor);
261
427k
                            top = &f->lf.lr_lpf_line[pl][offset];
262
427k
                        }
263
442k
                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
264
6.71M
                    } else if (!sbrow_start && by + 2 >= by_end) {
265
510k
                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
266
510k
                                                     (bx * 4 >> ss_hor);
267
510k
                        top = &f->lf.cdef_line[tf][pl][top_offset];
268
510k
                        if (resize) {
269
16.5k
                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
270
16.5k
                            bot = &f->lf.cdef_lpf_line[pl][offset];
271
493k
                        } else {
272
493k
                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
273
493k
                            offset = line * uv_stride + (bx * 4 >> ss_hor);
274
493k
                            bot = &f->lf.lr_lpf_line[pl][offset];
275
493k
                        }
276
6.20M
                    } else {
277
7.22M
                st_uv:;
278
7.22M
                        const ptrdiff_t offset = sby * 8 * uv_stride;
279
7.22M
                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
280
7.22M
                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
281
7.22M
                    }
282
8.18M
                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
283
8.18M
                                         lr_bak[bit][pl], top, bot,
284
8.18M
                                         uv_pri_lvl, uv_sec_lvl, uvdir,
285
8.18M
                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
286
8.18M
                }
287
288
4.65M
            skip_uv:
289
4.65M
                bit ^= 1;
290
291
5.92M
            next_b:
292
5.92M
                bptrs[0] += 8;
293
5.92M
                bptrs[1] += 8 >> ss_hor;
294
5.92M
                bptrs[2] += 8 >> ss_hor;
295
5.92M
            }
296
297
3.41M
        next_sb:
298
3.41M
            iptrs[0] += sbsz * 4;
299
3.41M
            iptrs[1] += sbsz * 4 >> ss_hor;
300
3.41M
            iptrs[2] += sbsz * 4 >> ss_hor;
301
3.41M
        }
302
303
1.07M
        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
304
1.07M
        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
305
1.07M
        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
306
1.07M
        tc->top_pre_cdef_toggle ^= 1;
307
1.07M
    }
308
219k
}
dav1d_cdef_brow_16bpc
Line
Count
Source
102
768k
{
103
768k
    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
104
768k
    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
105
768k
    const Dav1dDSPContext *const dsp = f->dsp;
106
768k
    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
107
768k
    pixel *ptrs[3] = { p[0], p[1], p[2] };
108
768k
    const int sbsz = 16;
109
768k
    const int sb64w = f->sb128w << 1;
110
768k
    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
111
768k
    const enum Dav1dPixelLayout layout = f->cur.p.layout;
112
768k
    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
113
768k
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
114
768k
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
115
768k
    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
116
768k
                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
117
768k
    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
118
768k
    const int have_tt = f->c->n_tc > 1;
119
768k
    const int sb128 = f->seq_hdr->sb128;
120
768k
    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
121
768k
    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
122
768k
    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
123
124
6.41M
    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
125
5.66M
        const int tf = tc->top_pre_cdef_toggle;
126
5.66M
        const int by_idx = (by & 30) >> 1;
127
5.66M
        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
128
129
5.66M
        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
130
5.30M
            edges & CDEF_HAVE_BOTTOM)
131
5.30M
        {
132
            // backup pre-filter data for next iteration
133
5.30M
            pixel *const cdef_top_bak[3] = {
134
5.30M
                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
135
5.30M
                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
136
5.30M
                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
137
5.30M
            };
138
5.30M
            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
139
5.30M
        }
140
141
5.66M
        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
142
5.66M
        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
143
5.66M
        edges &= ~CDEF_HAVE_LEFT;
144
5.66M
        edges |= CDEF_HAVE_RIGHT;
145
5.66M
        enum Backup2x8Flags prev_flag = 0;
146
17.7M
        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
147
12.1M
            const int sb128x = sbx >> 1;
148
12.1M
            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
149
12.1M
            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
150
12.1M
            if (cdef_idx == -1 ||
151
1.66M
                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
152
1.10M
                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
153
11.5M
            {
154
11.5M
                prev_flag = 0;
155
11.5M
                goto next_sb;
156
11.5M
            }
157
158
            // Create a complete 32-bit mask for the sb row ahead of time.
159
589k
            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
160
589k
            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
161
589k
                                                    noskip_row[0][0];
162
163
589k
            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
164
589k
            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
165
589k
            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
166
167
589k
            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
168
589k
            int y_sec_lvl = y_lvl & 3;
169
589k
            y_sec_lvl += y_sec_lvl == 3;
170
589k
            y_sec_lvl <<= bitdepth_min_8;
171
172
589k
            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
173
589k
            int uv_sec_lvl = uv_lvl & 3;
174
589k
            uv_sec_lvl += uv_sec_lvl == 3;
175
589k
            uv_sec_lvl <<= bitdepth_min_8;
176
177
589k
            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
178
3.93M
            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
179
3.34M
                 bx += 2, edges |= CDEF_HAVE_LEFT)
180
3.32M
            {
181
3.32M
                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
182
183
                // check if this 8x8 block had any coded coefficients; if not,
184
                // go to the next block
185
3.32M
                const uint32_t bx_mask = 3U << (bx & 30);
186
3.32M
                if (!(noskip_mask & bx_mask)) {
187
392k
                    prev_flag = 0;
188
392k
                    goto next_b;
189
392k
                }
190
2.93M
                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
191
2.93M
                prev_flag = flag;
192
2.93M
                if (do_left && edges & CDEF_HAVE_LEFT) {
193
                    // we didn't backup the prefilter data because it wasn't
194
                    // there, so do it here instead
195
55.1k
                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
196
55.1k
                }
197
2.93M
                if (edges & CDEF_HAVE_RIGHT) {
198
                    // backup pre-filter data for next iteration
199
2.78M
                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
200
2.78M
                }
201
202
2.93M
                int dir;
203
2.93M
                unsigned variance;
204
2.93M
                if (y_pri_lvl || uv_pri_lvl)
205
2.59M
                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
206
2.59M
                                        &variance HIGHBD_CALL_SUFFIX);
207
208
2.93M
                const pixel *top, *bot;
209
2.93M
                ptrdiff_t offset;
210
211
2.93M
                if (!have_tt) goto st_y;
212
2.66M
                if (sbrow_start && by == by_start) {
213
173k
                    if (resize) {
214
30.4k
                        offset = (sby - 1) * 4 * y_stride + bx * 4;
215
30.4k
                        top = &f->lf.cdef_lpf_line[0][offset];
216
142k
                    } else {
217
142k
                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
218
142k
                        top = &f->lf.lr_lpf_line[0][offset];
219
142k
                    }
220
173k
                    bot = bptrs[0] + 8 * y_stride;
221
2.49M
                } else if (!sbrow_start && by + 2 >= by_end) {
222
228k
                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
223
228k
                    if (resize) {
224
49.8k
                        offset = (sby * 4 + 2) * y_stride + bx * 4;
225
49.8k
                        bot = &f->lf.cdef_lpf_line[0][offset];
226
178k
                    } else {
227
178k
                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
228
178k
                        offset = line * y_stride + bx * 4;
229
178k
                        bot = &f->lf.lr_lpf_line[0][offset];
230
178k
                    }
231
2.26M
                } else {
232
2.42M
            st_y:;
233
2.42M
                    offset = sby * 4 * y_stride;
234
2.42M
                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
235
2.42M
                    bot = bptrs[0] + 8 * y_stride;
236
2.42M
                }
237
2.83M
                if (y_pri_lvl) {
238
2.41M
                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
239
2.41M
                    if (adj_y_pri_lvl || y_sec_lvl)
240
1.87M
                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
241
1.87M
                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
242
1.87M
                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
243
2.41M
                } else if (y_sec_lvl)
244
145k
                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
245
145k
                                    top, bot, 0, y_sec_lvl, 0, damping,
246
145k
                                    edges HIGHBD_CALL_SUFFIX);
247
248
2.83M
                if (!uv_lvl) goto skip_uv;
249
2.83M
                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
250
251
1.86M
                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
252
5.63M
                for (int pl = 1; pl <= 2; pl++) {
253
3.73M
                    if (!have_tt) goto st_uv;
254
3.52M
                    if (sbrow_start && by == by_start) {
255
213k
                        if (resize) {
256
32.1k
                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
257
32.1k
                            top = &f->lf.cdef_lpf_line[pl][offset];
258
181k
                        } else {
259
181k
                            const int line = sby * (4 << sb128) - 4;
260
181k
                            offset = line * uv_stride + (bx * 4 >> ss_hor);
261
181k
                            top = &f->lf.lr_lpf_line[pl][offset];
262
181k
                        }
263
213k
                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
264
3.33M
                    } else if (!sbrow_start && by + 2 >= by_end) {
265
254k
                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
266
254k
                                                     (bx * 4 >> ss_hor);
267
254k
                        top = &f->lf.cdef_line[tf][pl][top_offset];
268
254k
                        if (resize) {
269
39.9k
                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
270
39.9k
                            bot = &f->lf.cdef_lpf_line[pl][offset];
271
214k
                        } else {
272
214k
                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
273
214k
                            offset = line * uv_stride + (bx * 4 >> ss_hor);
274
214k
                            bot = &f->lf.lr_lpf_line[pl][offset];
275
214k
                        }
276
3.06M
                    } else {
277
3.30M
                st_uv:;
278
3.30M
                        const ptrdiff_t offset = sby * 8 * uv_stride;
279
3.30M
                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
280
3.30M
                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
281
3.30M
                    }
282
3.76M
                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
283
3.76M
                                         lr_bak[bit][pl], top, bot,
284
3.76M
                                         uv_pri_lvl, uv_sec_lvl, uvdir,
285
3.76M
                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
286
3.76M
                }
287
288
2.95M
            skip_uv:
289
2.95M
                bit ^= 1;
290
291
3.34M
            next_b:
292
3.34M
                bptrs[0] += 8;
293
3.34M
                bptrs[1] += 8 >> ss_hor;
294
3.34M
                bptrs[2] += 8 >> ss_hor;
295
3.34M
            }
296
297
12.0M
        next_sb:
298
12.0M
            iptrs[0] += sbsz * 4;
299
12.0M
            iptrs[1] += sbsz * 4 >> ss_hor;
300
12.0M
            iptrs[2] += sbsz * 4 >> ss_hor;
301
12.0M
        }
302
303
5.64M
        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
304
5.64M
        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
305
5.64M
        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
306
5.64M
        tc->top_pre_cdef_toggle ^= 1;
307
5.64M
    }
308
768k
}