Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/lf_apply_tmpl.c
Line
Count
Source
1
/*
2
 * Copyright © 2018, VideoLAN and dav1d authors
3
 * Copyright © 2018, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <string.h>
31
32
#include "common/intops.h"
33
34
#include "src/lf_apply.h"
35
#include "src/lr_apply.h"
36
37
// The loop filter buffer stores 12 rows of pixels. A superblock block will
38
// contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
39
// and 2 below) the final 4 rows are used to swap the bottom of the last
40
// stripe with the top of the next super block row.
41
static void backup_lpf(const Dav1dFrameContext *const f,
42
                       pixel *dst, const ptrdiff_t dst_stride,
43
                       const pixel *src, const ptrdiff_t src_stride,
44
                       const int ss_ver, const int sb128,
45
                       int row, const int row_h, const int src_w,
46
                       const int h, const int ss_hor, const int lr_backup)
47
436k
{
48
436k
    const int cdef_backup = !lr_backup;
49
436k
    const int dst_w = f->frame_hdr->super_res.enabled ?
50
289k
                      (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
51
52
    // The first stripe of the frame is shorter by 8 luma pixel rows.
53
436k
    int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
54
436k
    src += (stripe_h - 2) * PXSTRIDE(src_stride);
55
56
436k
    if (f->c->n_tc == 1) {
57
0
        if (row) {
58
0
            const int top = 4 << sb128;
59
            // Copy the top part of the stored loop filtered pixels from the
60
            // previous sb row needed above the first stripe of this sb row.
61
0
            pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
62
0
                       &dst[PXSTRIDE(dst_stride) *  top],      dst_w);
63
0
            pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
64
0
                       &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
65
0
            pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
66
0
                       &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
67
0
            pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
68
0
                       &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
69
0
        }
70
0
        dst += 4 * PXSTRIDE(dst_stride);
71
0
    }
72
73
436k
    if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
74
55.4k
        while (row + stripe_h <= row_h) {
75
27.8k
            const int n_lines = 4 - (row + stripe_h + 1 == h);
76
27.8k
            f->dsp->mc.resize(dst, dst_stride, src, src_stride,
77
27.8k
                              dst_w, n_lines, src_w, f->resize_step[ss_hor],
78
27.8k
                              f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
79
27.8k
            row += stripe_h; // unmodified stripe_h for the 1st stripe
80
27.8k
            stripe_h = 64 >> ss_ver;
81
27.8k
            src += stripe_h * PXSTRIDE(src_stride);
82
27.8k
            dst += n_lines * PXSTRIDE(dst_stride);
83
27.8k
            if (n_lines == 3) {
84
493
                pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
85
493
                dst += PXSTRIDE(dst_stride);
86
493
            }
87
27.8k
        }
88
408k
    } else {
89
748k
        while (row + stripe_h <= row_h) {
90
339k
            const int n_lines = 4 - (row + stripe_h + 1 == h);
91
1.69M
            for (int i = 0; i < 4; i++) {
92
1.35M
                pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
93
1.35M
                                               src, src_w);
94
1.35M
                dst += PXSTRIDE(dst_stride);
95
1.35M
                src += PXSTRIDE(src_stride);
96
1.35M
            }
97
339k
            row += stripe_h; // unmodified stripe_h for the 1st stripe
98
339k
            stripe_h = 64 >> ss_ver;
99
339k
            src += (stripe_h - 4) * PXSTRIDE(src_stride);
100
339k
        }
101
408k
    }
102
436k
}
103
104
void bytefn(dav1d_copy_lpf)(Dav1dFrameContext *const f,
105
                            /*const*/ pixel *const src[3], const int sby)
106
173k
{
107
173k
    const int have_tt = f->c->n_tc > 1;
108
173k
    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
109
173k
    const int offset = 8 * !!sby;
110
173k
    const ptrdiff_t *const src_stride = f->cur.stride;
111
173k
    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
112
173k
    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
113
173k
    pixel *const dst[3] = {
114
173k
        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
115
173k
        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
116
173k
        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
117
173k
    };
118
119
    // TODO Also check block level restore type to reduce copying.
120
173k
    const int restore_planes = f->lf.restore_planes;
121
122
173k
    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
123
167k
        const int h = f->cur.p.h;
124
167k
        const int w = f->bw << 2;
125
167k
        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
126
167k
        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
127
167k
        if (restore_planes & LR_RESTORE_Y || !resize)
128
153k
            backup_lpf(f, dst[0], lr_stride[0],
129
153k
                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
130
153k
                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
131
167k
        if (have_tt && resize) {
132
27.6k
            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
133
27.6k
            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
134
27.6k
                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
135
27.6k
                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
136
27.6k
        }
137
167k
    }
138
173k
    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
139
167k
        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
140
136k
    {
141
136k
        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
142
136k
        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
143
136k
        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
144
136k
        const int w = f->bw << (2 - ss_hor);
145
136k
        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
146
136k
        const int offset_uv = offset >> ss_ver;
147
136k
        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
148
136k
        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
149
136k
        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
150
112k
            if (restore_planes & LR_RESTORE_U || !resize)
151
102k
                backup_lpf(f, dst[1], lr_stride[1],
152
102k
                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
153
102k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
154
102k
                           row_h, w, h, ss_hor, 1);
155
112k
            if (have_tt && resize)
156
16.8k
                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
157
16.8k
                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
158
16.8k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
159
16.8k
                           row_h, w, h, ss_hor, 0);
160
112k
        }
161
136k
        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
162
128k
            if (restore_planes & LR_RESTORE_V || !resize)
163
119k
                backup_lpf(f, dst[2], lr_stride[1],
164
119k
                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
165
119k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
166
119k
                           row_h, w, h, ss_hor, 1);
167
128k
            if (have_tt && resize)
168
16.4k
                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
169
16.4k
                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
170
16.4k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
171
16.4k
                           row_h, w, h, ss_hor, 0);
172
128k
        }
173
136k
    }
174
173k
}
dav1d_copy_lpf_8bpc
Line
Count
Source
106
85.9k
{
107
85.9k
    const int have_tt = f->c->n_tc > 1;
108
85.9k
    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
109
85.9k
    const int offset = 8 * !!sby;
110
85.9k
    const ptrdiff_t *const src_stride = f->cur.stride;
111
85.9k
    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
112
85.9k
    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
113
85.9k
    pixel *const dst[3] = {
114
85.9k
        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
115
85.9k
        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
116
85.9k
        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
117
85.9k
    };
118
119
    // TODO Also check block level restore type to reduce copying.
120
85.9k
    const int restore_planes = f->lf.restore_planes;
121
122
85.9k
    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
123
82.6k
        const int h = f->cur.p.h;
124
82.6k
        const int w = f->bw << 2;
125
82.6k
        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
126
82.6k
        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
127
82.6k
        if (restore_planes & LR_RESTORE_Y || !resize)
128
75.0k
            backup_lpf(f, dst[0], lr_stride[0],
129
75.0k
                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
130
75.0k
                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
131
82.6k
        if (have_tt && resize) {
132
14.3k
            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
133
14.3k
            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
134
14.3k
                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
135
14.3k
                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
136
14.3k
        }
137
82.6k
    }
138
85.9k
    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
139
83.0k
        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
140
68.7k
    {
141
68.7k
        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
142
68.7k
        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
143
68.7k
        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
144
68.7k
        const int w = f->bw << (2 - ss_hor);
145
68.7k
        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
146
68.7k
        const int offset_uv = offset >> ss_ver;
147
68.7k
        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
148
68.7k
        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
149
68.7k
        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
150
57.3k
            if (restore_planes & LR_RESTORE_U || !resize)
151
51.0k
                backup_lpf(f, dst[1], lr_stride[1],
152
51.0k
                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
153
51.0k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
154
51.0k
                           row_h, w, h, ss_hor, 1);
155
57.3k
            if (have_tt && resize)
156
9.25k
                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
157
9.25k
                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
158
9.25k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
159
9.25k
                           row_h, w, h, ss_hor, 0);
160
57.3k
        }
161
68.7k
        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
162
63.3k
            if (restore_planes & LR_RESTORE_V || !resize)
163
57.2k
                backup_lpf(f, dst[2], lr_stride[1],
164
57.2k
                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
165
57.2k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
166
57.2k
                           row_h, w, h, ss_hor, 1);
167
63.3k
            if (have_tt && resize)
168
9.43k
                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
169
9.43k
                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
170
9.43k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
171
9.43k
                           row_h, w, h, ss_hor, 0);
172
63.3k
        }
173
68.7k
    }
174
85.9k
}
dav1d_copy_lpf_16bpc
Line
Count
Source
106
88.0k
{
107
88.0k
    const int have_tt = f->c->n_tc > 1;
108
88.0k
    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
109
88.0k
    const int offset = 8 * !!sby;
110
88.0k
    const ptrdiff_t *const src_stride = f->cur.stride;
111
88.0k
    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
112
88.0k
    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
113
88.0k
    pixel *const dst[3] = {
114
88.0k
        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
115
88.0k
        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
116
88.0k
        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
117
88.0k
    };
118
119
    // TODO Also check block level restore type to reduce copying.
120
88.0k
    const int restore_planes = f->lf.restore_planes;
121
122
88.0k
    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
123
84.9k
        const int h = f->cur.p.h;
124
84.9k
        const int w = f->bw << 2;
125
84.9k
        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
126
84.9k
        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
127
84.9k
        if (restore_planes & LR_RESTORE_Y || !resize)
128
78.1k
            backup_lpf(f, dst[0], lr_stride[0],
129
78.1k
                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
130
78.1k
                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
131
84.9k
        if (have_tt && resize) {
132
13.3k
            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
133
13.3k
            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
134
13.3k
                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
135
13.3k
                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
136
13.3k
        }
137
84.9k
    }
138
88.0k
    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
139
84.7k
        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
140
67.6k
    {
141
67.6k
        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
142
67.6k
        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
143
67.6k
        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
144
67.6k
        const int w = f->bw << (2 - ss_hor);
145
67.6k
        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
146
67.6k
        const int offset_uv = offset >> ss_ver;
147
67.6k
        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
148
67.6k
        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
149
67.6k
        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
150
55.3k
            if (restore_planes & LR_RESTORE_U || !resize)
151
51.8k
                backup_lpf(f, dst[1], lr_stride[1],
152
51.8k
                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
153
51.8k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
154
51.8k
                           row_h, w, h, ss_hor, 1);
155
55.3k
            if (have_tt && resize)
156
7.56k
                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
157
7.56k
                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
158
7.56k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
159
7.56k
                           row_h, w, h, ss_hor, 0);
160
55.3k
        }
161
67.6k
        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
162
65.2k
            if (restore_planes & LR_RESTORE_V || !resize)
163
62.0k
                backup_lpf(f, dst[2], lr_stride[1],
164
62.0k
                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
165
62.0k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
166
62.0k
                           row_h, w, h, ss_hor, 1);
167
65.2k
            if (have_tt && resize)
168
7.03k
                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
169
7.03k
                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
170
7.03k
                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
171
7.03k
                           row_h, w, h, ss_hor, 0);
172
65.2k
        }
173
67.6k
    }
174
88.0k
}
175
176
static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
177
                                       const int have_left,
178
                                       const uint8_t (*lvl)[4],
179
                                       const ptrdiff_t b4_stride,
180
                                       const uint16_t (*const mask)[3][2],
181
                                       pixel *dst, const ptrdiff_t ls,
182
                                       const int w,
183
                                       const int starty4, const int endy4)
184
252k
{
185
252k
    const Dav1dDSPContext *const dsp = f->dsp;
186
187
    // filter edges between columns (e.g. block1 | block2)
188
4.20M
    for (int x = 0; x < w; x++) {
189
3.95M
        if (!have_left && !x) continue;
190
3.79M
        uint32_t hmask[4];
191
3.79M
        if (!starty4) {
192
2.99M
            hmask[0] = mask[x][0][0];
193
2.99M
            hmask[1] = mask[x][1][0];
194
2.99M
            hmask[2] = mask[x][2][0];
195
2.99M
            if (endy4 > 16) {
196
1.17M
                hmask[0] |= (unsigned) mask[x][0][1] << 16;
197
1.17M
                hmask[1] |= (unsigned) mask[x][1][1] << 16;
198
1.17M
                hmask[2] |= (unsigned) mask[x][2][1] << 16;
199
1.17M
            }
200
2.99M
        } else {
201
798k
            hmask[0] = mask[x][0][1];
202
798k
            hmask[1] = mask[x][1][1];
203
798k
            hmask[2] = mask[x][2][1];
204
798k
        }
205
3.79M
        hmask[3] = 0;
206
3.79M
        dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
207
3.79M
                                     (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
208
3.79M
                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
209
3.79M
    }
210
252k
}
211
212
static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
213
                                       const int have_top,
214
                                       const uint8_t (*lvl)[4],
215
                                       const ptrdiff_t b4_stride,
216
                                       const uint16_t (*const mask)[3][2],
217
                                       pixel *dst, const ptrdiff_t ls,
218
                                       const int w,
219
                                       const int starty4, const int endy4)
220
251k
{
221
251k
    const Dav1dDSPContext *const dsp = f->dsp;
222
223
    //                                 block1
224
    // filter edges between rows (e.g. ------)
225
    //                                 block2
226
3.98M
    for (int y = starty4; y < endy4;
227
3.73M
         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
228
3.73M
    {
229
3.73M
        if (!have_top && !y) continue;
230
3.61M
        const uint32_t vmask[4] = {
231
3.61M
            mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
232
3.61M
            mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
233
3.61M
            mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
234
3.61M
            0,
235
3.61M
        };
236
3.61M
        dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
237
3.61M
                                     (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
238
3.61M
                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
239
3.61M
    }
240
251k
}
241
242
static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
243
                                        const int have_left,
244
                                        const uint8_t (*lvl)[4],
245
                                        const ptrdiff_t b4_stride,
246
                                        const uint16_t (*const mask)[2][2],
247
                                        pixel *const u, pixel *const v,
248
                                        const ptrdiff_t ls, const int w,
249
                                        const int starty4, const int endy4,
250
                                        const int ss_ver)
251
189k
{
252
189k
    const Dav1dDSPContext *const dsp = f->dsp;
253
254
    // filter edges between columns (e.g. block1 | block2)
255
2.29M
    for (int x = 0; x < w; x++) {
256
2.10M
        if (!have_left && !x) continue;
257
1.99M
        uint32_t hmask[3];
258
1.99M
        if (!starty4) {
259
1.64M
            hmask[0] = mask[x][0][0];
260
1.64M
            hmask[1] = mask[x][1][0];
261
1.64M
            if (endy4 > (16 >> ss_ver)) {
262
755k
                hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
263
755k
                hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
264
755k
            }
265
1.64M
        } else {
266
352k
            hmask[0] = mask[x][0][1];
267
352k
            hmask[1] = mask[x][1][1];
268
352k
        }
269
1.99M
        hmask[2] = 0;
270
1.99M
        dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
271
1.99M
                                     (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
272
1.99M
                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
273
1.99M
        dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
274
1.99M
                                     (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
275
1.99M
                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
276
1.99M
    }
277
189k
}
278
279
static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
280
                                        const int have_top,
281
                                        const uint8_t (*lvl)[4],
282
                                        const ptrdiff_t b4_stride,
283
                                        const uint16_t (*const mask)[2][2],
284
                                        pixel *const u, pixel *const v,
285
                                        const ptrdiff_t ls, const int w,
286
                                        const int starty4, const int endy4,
287
                                        const int ss_hor)
288
188k
{
289
188k
    const Dav1dDSPContext *const dsp = f->dsp;
290
188k
    ptrdiff_t off_l = 0;
291
292
    //                                 block1
293
    // filter edges between rows (e.g. ------)
294
    //                                 block2
295
2.32M
    for (int y = starty4; y < endy4;
296
2.13M
         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
297
2.13M
    {
298
2.13M
        if (!have_top && !y) continue;
299
2.05M
        const uint32_t vmask[3] = {
300
2.05M
            mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
301
2.05M
            mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
302
2.05M
            0,
303
2.05M
        };
304
2.05M
        dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
305
2.05M
                                     (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
306
2.05M
                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
307
2.05M
        dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
308
2.05M
                                     (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
309
2.05M
                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
310
2.05M
    }
311
188k
}
312
313
void bytefn(dav1d_loopfilter_sbrow_cols)(const Dav1dFrameContext *const f,
314
                                         pixel *const p[3], Av1Filter *const lflvl,
315
                                         int sby, const int start_of_tile_row)
316
162k
{
317
162k
    int x, have_left;
318
    // Don't filter outside the frame
319
162k
    const int is_sb64 = !f->seq_hdr->sb128;
320
162k
    const int starty4 = (sby & is_sb64) << 4;
321
162k
    const int sbsz = 32 >> is_sb64;
322
162k
    const int sbl2 = 5 - is_sb64;
323
162k
    const int halign = (f->bh + 31) & ~31;
324
162k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
325
162k
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
326
162k
    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
327
162k
    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
328
162k
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
329
162k
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
330
331
    // fix lpf strength at tile col boundaries
332
162k
    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
333
162k
    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
334
165k
    for (int tile_col = 1;; tile_col++) {
335
165k
        x = f->frame_hdr->tiling.col_start_sb[tile_col];
336
165k
        if ((x << sbl2) >= f->bw) break;
337
2.56k
        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
338
2.56k
        x >>= is_sb64;
339
340
2.56k
        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
341
37.3k
        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
342
34.8k
            const int sidx = mask >= 0x10000U;
343
34.8k
            const unsigned smask = mask >> (sidx << 4);
344
34.8k
            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
345
34.8k
                                !!(y_hmask[1][sidx] & smask);
346
34.8k
            y_hmask[2][sidx] &= ~smask;
347
34.8k
            y_hmask[1][sidx] &= ~smask;
348
34.8k
            y_hmask[0][sidx] &= ~smask;
349
34.8k
            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
350
34.8k
        }
351
352
2.56k
        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
353
2.22k
            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
354
21.3k
            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
355
19.1k
                 y++, uv_mask <<= 1)
356
19.1k
            {
357
19.1k
                const int sidx = uv_mask >= vmax;
358
19.1k
                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
359
19.1k
                const int idx = !!(uv_hmask[1][sidx] & smask);
360
19.1k
                uv_hmask[1][sidx] &= ~smask;
361
19.1k
                uv_hmask[0][sidx] &= ~smask;
362
19.1k
                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
363
19.1k
            }
364
2.22k
        }
365
2.56k
        lpf_y  += halign;
366
2.56k
        lpf_uv += halign >> ss_ver;
367
2.56k
    }
368
369
    // fix lpf strength at tile row boundaries
370
162k
    if (start_of_tile_row) {
371
1.56k
        const BlockContext *a;
372
1.56k
        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
373
3.95k
             x < f->sb128w; x++, a++)
374
2.39k
        {
375
2.39k
            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
376
2.39k
            const unsigned w = imin(32, f->w4 - (x << 5));
377
39.7k
            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
378
37.3k
                const int sidx = mask >= 0x10000U;
379
37.3k
                const unsigned smask = mask >> (sidx << 4);
380
37.3k
                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
381
37.3k
                                    !!(y_vmask[1][sidx] & smask);
382
37.3k
                y_vmask[2][sidx] &= ~smask;
383
37.3k
                y_vmask[1][sidx] &= ~smask;
384
37.3k
                y_vmask[0][sidx] &= ~smask;
385
37.3k
                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
386
37.3k
            }
387
388
2.39k
            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
389
2.05k
                const unsigned cw = (w + ss_hor) >> ss_hor;
390
2.05k
                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
391
23.9k
                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
392
21.9k
                    const int sidx = uv_mask >= hmax;
393
21.9k
                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
394
21.9k
                    const int idx = !!(uv_vmask[1][sidx] & smask);
395
21.9k
                    uv_vmask[1][sidx] &= ~smask;
396
21.9k
                    uv_vmask[0][sidx] &= ~smask;
397
21.9k
                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
398
21.9k
                }
399
2.05k
            }
400
2.39k
        }
401
1.56k
    }
402
403
162k
    pixel *ptr;
404
162k
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
405
415k
    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
406
252k
         x++, have_left = 1, ptr += 128, level_ptr += 32)
407
252k
    {
408
252k
        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
409
252k
                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
410
252k
                            imin(32, f->w4 - x * 32), starty4, endy4);
411
252k
    }
412
413
162k
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
414
49.0k
        return;
415
416
113k
    ptrdiff_t uv_off;
417
113k
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
418
303k
    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
419
189k
         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
420
189k
    {
421
189k
        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
422
189k
                             lflvl[x].filter_uv[0],
423
189k
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
424
189k
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
425
189k
                             starty4 >> ss_ver, uv_endy4, ss_ver);
426
189k
    }
427
113k
}
dav1d_loopfilter_sbrow_cols_8bpc
Line
Count
Source
316
83.2k
{
317
83.2k
    int x, have_left;
318
    // Don't filter outside the frame
319
83.2k
    const int is_sb64 = !f->seq_hdr->sb128;
320
83.2k
    const int starty4 = (sby & is_sb64) << 4;
321
83.2k
    const int sbsz = 32 >> is_sb64;
322
83.2k
    const int sbl2 = 5 - is_sb64;
323
83.2k
    const int halign = (f->bh + 31) & ~31;
324
83.2k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
325
83.2k
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
326
83.2k
    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
327
83.2k
    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
328
83.2k
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
329
83.2k
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
330
331
    // fix lpf strength at tile col boundaries
332
83.2k
    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
333
83.2k
    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
334
84.6k
    for (int tile_col = 1;; tile_col++) {
335
84.6k
        x = f->frame_hdr->tiling.col_start_sb[tile_col];
336
84.6k
        if ((x << sbl2) >= f->bw) break;
337
1.48k
        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
338
1.48k
        x >>= is_sb64;
339
340
1.48k
        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
341
23.9k
        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
342
22.4k
            const int sidx = mask >= 0x10000U;
343
22.4k
            const unsigned smask = mask >> (sidx << 4);
344
22.4k
            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
345
22.4k
                                !!(y_hmask[1][sidx] & smask);
346
22.4k
            y_hmask[2][sidx] &= ~smask;
347
22.4k
            y_hmask[1][sidx] &= ~smask;
348
22.4k
            y_hmask[0][sidx] &= ~smask;
349
22.4k
            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
350
22.4k
        }
351
352
1.48k
        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
353
1.38k
            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
354
13.9k
            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
355
12.6k
                 y++, uv_mask <<= 1)
356
12.6k
            {
357
12.6k
                const int sidx = uv_mask >= vmax;
358
12.6k
                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
359
12.6k
                const int idx = !!(uv_hmask[1][sidx] & smask);
360
12.6k
                uv_hmask[1][sidx] &= ~smask;
361
12.6k
                uv_hmask[0][sidx] &= ~smask;
362
12.6k
                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
363
12.6k
            }
364
1.38k
        }
365
1.48k
        lpf_y  += halign;
366
1.48k
        lpf_uv += halign >> ss_ver;
367
1.48k
    }
368
369
    // fix lpf strength at tile row boundaries
370
83.2k
    if (start_of_tile_row) {
371
755
        const BlockContext *a;
372
755
        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
373
1.89k
             x < f->sb128w; x++, a++)
374
1.14k
        {
375
1.14k
            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
376
1.14k
            const unsigned w = imin(32, f->w4 - (x << 5));
377
18.1k
            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
378
17.0k
                const int sidx = mask >= 0x10000U;
379
17.0k
                const unsigned smask = mask >> (sidx << 4);
380
17.0k
                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
381
17.0k
                                    !!(y_vmask[1][sidx] & smask);
382
17.0k
                y_vmask[2][sidx] &= ~smask;
383
17.0k
                y_vmask[1][sidx] &= ~smask;
384
17.0k
                y_vmask[0][sidx] &= ~smask;
385
17.0k
                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
386
17.0k
            }
387
388
1.14k
            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
389
1.00k
                const unsigned cw = (w + ss_hor) >> ss_hor;
390
1.00k
                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
391
9.41k
                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
392
8.40k
                    const int sidx = uv_mask >= hmax;
393
8.40k
                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
394
8.40k
                    const int idx = !!(uv_vmask[1][sidx] & smask);
395
8.40k
                    uv_vmask[1][sidx] &= ~smask;
396
8.40k
                    uv_vmask[0][sidx] &= ~smask;
397
8.40k
                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
398
8.40k
                }
399
1.00k
            }
400
1.14k
        }
401
755
    }
402
403
83.2k
    pixel *ptr;
404
83.2k
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
405
215k
    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
406
131k
         x++, have_left = 1, ptr += 128, level_ptr += 32)
407
131k
    {
408
131k
        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
409
131k
                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
410
131k
                            imin(32, f->w4 - x * 32), starty4, endy4);
411
131k
    }
412
413
83.2k
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
414
18.8k
        return;
415
416
64.3k
    ptrdiff_t uv_off;
417
64.3k
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
418
172k
    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
419
107k
         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
420
107k
    {
421
107k
        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
422
107k
                             lflvl[x].filter_uv[0],
423
107k
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
424
107k
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
425
107k
                             starty4 >> ss_ver, uv_endy4, ss_ver);
426
107k
    }
427
64.3k
}
dav1d_loopfilter_sbrow_cols_16bpc
Line
Count
Source
316
79.5k
{
317
79.5k
    int x, have_left;
318
    // Don't filter outside the frame
319
79.5k
    const int is_sb64 = !f->seq_hdr->sb128;
320
79.5k
    const int starty4 = (sby & is_sb64) << 4;
321
79.5k
    const int sbsz = 32 >> is_sb64;
322
79.5k
    const int sbl2 = 5 - is_sb64;
323
79.5k
    const int halign = (f->bh + 31) & ~31;
324
79.5k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
325
79.5k
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
326
79.5k
    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
327
79.5k
    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
328
79.5k
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
329
79.5k
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
330
331
    // fix lpf strength at tile col boundaries
332
79.5k
    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
333
79.5k
    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
334
80.6k
    for (int tile_col = 1;; tile_col++) {
335
80.6k
        x = f->frame_hdr->tiling.col_start_sb[tile_col];
336
80.6k
        if ((x << sbl2) >= f->bw) break;
337
1.08k
        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
338
1.08k
        x >>= is_sb64;
339
340
1.08k
        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
341
13.4k
        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
342
12.3k
            const int sidx = mask >= 0x10000U;
343
12.3k
            const unsigned smask = mask >> (sidx << 4);
344
12.3k
            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
345
12.3k
                                !!(y_hmask[1][sidx] & smask);
346
12.3k
            y_hmask[2][sidx] &= ~smask;
347
12.3k
            y_hmask[1][sidx] &= ~smask;
348
12.3k
            y_hmask[0][sidx] &= ~smask;
349
12.3k
            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
350
12.3k
        }
351
352
1.08k
        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
353
845
            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
354
7.39k
            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
355
6.55k
                 y++, uv_mask <<= 1)
356
6.55k
            {
357
6.55k
                const int sidx = uv_mask >= vmax;
358
6.55k
                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
359
6.55k
                const int idx = !!(uv_hmask[1][sidx] & smask);
360
6.55k
                uv_hmask[1][sidx] &= ~smask;
361
6.55k
                uv_hmask[0][sidx] &= ~smask;
362
6.55k
                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
363
6.55k
            }
364
845
        }
365
1.08k
        lpf_y  += halign;
366
1.08k
        lpf_uv += halign >> ss_ver;
367
1.08k
    }
368
369
    // fix lpf strength at tile row boundaries
370
79.5k
    if (start_of_tile_row) {
371
807
        const BlockContext *a;
372
807
        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
373
2.05k
             x < f->sb128w; x++, a++)
374
1.25k
        {
375
1.25k
            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
376
1.25k
            const unsigned w = imin(32, f->w4 - (x << 5));
377
21.5k
            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
378
20.2k
                const int sidx = mask >= 0x10000U;
379
20.2k
                const unsigned smask = mask >> (sidx << 4);
380
20.2k
                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
381
20.2k
                                    !!(y_vmask[1][sidx] & smask);
382
20.2k
                y_vmask[2][sidx] &= ~smask;
383
20.2k
                y_vmask[1][sidx] &= ~smask;
384
20.2k
                y_vmask[0][sidx] &= ~smask;
385
20.2k
                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
386
20.2k
            }
387
388
1.25k
            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
389
1.05k
                const unsigned cw = (w + ss_hor) >> ss_hor;
390
1.05k
                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
391
14.5k
                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
392
13.5k
                    const int sidx = uv_mask >= hmax;
393
13.5k
                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
394
13.5k
                    const int idx = !!(uv_vmask[1][sidx] & smask);
395
13.5k
                    uv_vmask[1][sidx] &= ~smask;
396
13.5k
                    uv_vmask[0][sidx] &= ~smask;
397
13.5k
                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
398
13.5k
                }
399
1.05k
            }
400
1.25k
        }
401
807
    }
402
403
79.5k
    pixel *ptr;
404
79.5k
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
405
200k
    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
406
120k
         x++, have_left = 1, ptr += 128, level_ptr += 32)
407
120k
    {
408
120k
        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
409
120k
                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
410
120k
                            imin(32, f->w4 - x * 32), starty4, endy4);
411
120k
    }
412
413
79.5k
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
414
30.1k
        return;
415
416
49.3k
    ptrdiff_t uv_off;
417
49.3k
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
418
131k
    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
419
81.7k
         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
420
81.7k
    {
421
81.7k
        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
422
81.7k
                             lflvl[x].filter_uv[0],
423
81.7k
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
424
81.7k
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
425
81.7k
                             starty4 >> ss_ver, uv_endy4, ss_ver);
426
81.7k
    }
427
49.3k
}
428
429
void bytefn(dav1d_loopfilter_sbrow_rows)(const Dav1dFrameContext *const f,
430
                                         pixel *const p[3], Av1Filter *const lflvl,
431
                                         int sby)
432
162k
{
433
162k
    int x;
434
    // Don't filter outside the frame
435
162k
    const int have_top = sby > 0;
436
162k
    const int is_sb64 = !f->seq_hdr->sb128;
437
162k
    const int starty4 = (sby & is_sb64) << 4;
438
162k
    const int sbsz = 32 >> is_sb64;
439
162k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
440
162k
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
441
162k
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
442
162k
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
443
444
162k
    pixel *ptr;
445
162k
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
446
413k
    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
447
251k
        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
448
251k
                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
449
251k
                            imin(32, f->w4 - x * 32), starty4, endy4);
450
251k
    }
451
452
162k
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
453
48.9k
        return;
454
455
113k
    ptrdiff_t uv_off;
456
113k
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
457
302k
    for (uv_off = 0, x = 0; x < f->sb128w;
458
188k
         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
459
188k
    {
460
188k
        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
461
188k
                             lflvl[x].filter_uv[1],
462
188k
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
463
188k
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
464
188k
                             starty4 >> ss_ver, uv_endy4, ss_hor);
465
188k
    }
466
113k
}
dav1d_loopfilter_sbrow_rows_8bpc
Line
Count
Source
432
83.0k
{
433
83.0k
    int x;
434
    // Don't filter outside the frame
435
83.0k
    const int have_top = sby > 0;
436
83.0k
    const int is_sb64 = !f->seq_hdr->sb128;
437
83.0k
    const int starty4 = (sby & is_sb64) << 4;
438
83.0k
    const int sbsz = 32 >> is_sb64;
439
83.0k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
440
83.0k
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
441
83.0k
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
442
83.0k
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
443
444
83.0k
    pixel *ptr;
445
83.0k
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
446
214k
    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
447
131k
        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
448
131k
                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
449
131k
                            imin(32, f->w4 - x * 32), starty4, endy4);
450
131k
    }
451
452
83.0k
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
453
18.8k
        return;
454
455
64.2k
    ptrdiff_t uv_off;
456
64.2k
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
457
171k
    for (uv_off = 0, x = 0; x < f->sb128w;
458
107k
         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
459
107k
    {
460
107k
        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
461
107k
                             lflvl[x].filter_uv[1],
462
107k
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
463
107k
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
464
107k
                             starty4 >> ss_ver, uv_endy4, ss_hor);
465
107k
    }
466
64.2k
}
dav1d_loopfilter_sbrow_rows_16bpc
Line
Count
Source
432
79.4k
{
433
79.4k
    int x;
434
    // Don't filter outside the frame
435
79.4k
    const int have_top = sby > 0;
436
79.4k
    const int is_sb64 = !f->seq_hdr->sb128;
437
79.4k
    const int starty4 = (sby & is_sb64) << 4;
438
79.4k
    const int sbsz = 32 >> is_sb64;
439
79.4k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
440
79.4k
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
441
79.4k
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
442
79.4k
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
443
444
79.4k
    pixel *ptr;
445
79.4k
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
446
199k
    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
447
120k
        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
448
120k
                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
449
120k
                            imin(32, f->w4 - x * 32), starty4, endy4);
450
120k
    }
451
452
79.4k
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
453
30.1k
        return;
454
455
49.3k
    ptrdiff_t uv_off;
456
49.3k
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
457
130k
    for (uv_off = 0, x = 0; x < f->sb128w;
458
81.4k
         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
459
81.4k
    {
460
81.4k
        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
461
81.4k
                             lflvl[x].filter_uv[1],
462
81.4k
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
463
81.4k
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
464
81.4k
                             starty4 >> ss_ver, uv_endy4, ss_hor);
465
81.4k
    }
466
49.3k
}