/work/dav1d/src/lf_mask.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <string.h> |
31 | | |
32 | | #include "common/intops.h" |
33 | | |
34 | | #include "src/ctx.h" |
35 | | #include "src/levels.h" |
36 | | #include "src/lf_mask.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | static void decomp_tx(uint8_t (*const txa)[2 /* txsz, step */][32 /* y */][32 /* x */], |
40 | | const enum RectTxfmSize from, |
41 | | const int depth, |
42 | | const int y_off, const int x_off, |
43 | | const uint16_t *const tx_masks) |
44 | 244k | { |
45 | 244k | const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from]; |
46 | 244k | const int is_split = (from == (int) TX_4X4 || depth > 1) ? 0 : |
47 | 244k | (tx_masks[depth] >> (y_off * 4 + x_off)) & 1; |
48 | | |
49 | 244k | if (is_split) { |
50 | 2.67k | const enum RectTxfmSize sub = t_dim->sub; |
51 | 2.67k | const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1; |
52 | | |
53 | 2.67k | decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks); |
54 | 2.67k | if (t_dim->w >= t_dim->h) |
55 | 2.24k | decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4], |
56 | 2.24k | sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks); |
57 | 2.67k | if (t_dim->h >= t_dim->w) { |
58 | 1.97k | decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0], |
59 | 1.97k | sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks); |
60 | 1.97k | if (t_dim->w >= t_dim->h) |
61 | 1.54k | decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4], |
62 | 1.54k | sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks); |
63 | 1.97k | } |
64 | 241k | } else { |
65 | 241k | const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh); |
66 | | |
67 | 241k | #define set_ctx(rep_macro) \ |
68 | 1.30M | for (int y = 0; y < t_dim->h; y++) { \ |
69 | 1.06M | rep_macro(txa[0][0][y], 0, lw); \ |
70 | 1.06M | rep_macro(txa[1][0][y], 0, lh); \ |
71 | 1.06M | txa[0][1][y][0] = t_dim->w; \ |
72 | 1.06M | } |
73 | 241k | case_set_upto16(t_dim->lw); |
74 | 241k | #undef set_ctx |
75 | 241k | dav1d_memset_pow2[t_dim->lw](txa[1][1][0], t_dim->h); |
76 | 241k | } |
77 | 244k | } |
78 | | |
79 | | static inline void mask_edges_inter(uint16_t (*const masks)[32][3][2], |
80 | | const int by4, const int bx4, |
81 | | const int w4, const int h4, const int skip, |
82 | | const enum RectTxfmSize max_tx, |
83 | | const uint16_t *const tx_masks, |
84 | | uint8_t *const a, uint8_t *const l) |
85 | 115k | { |
86 | 115k | const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx]; |
87 | 115k | int y, x; |
88 | | |
89 | 115k | ALIGN_STK_16(uint8_t, txa, 2 /* edge */, [2 /* txsz, step */][32 /* y */][32 /* x */]); |
90 | 264k | for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++) |
91 | 384k | for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++) |
92 | 235k | decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x], |
93 | 235k | max_tx, 0, y_off, x_off, tx_masks); |
94 | | |
95 | | // left block edge |
96 | 115k | unsigned mask = 1U << by4; |
97 | 746k | for (y = 0; y < h4; y++, mask <<= 1) { |
98 | 630k | const int sidx = mask >= 0x10000; |
99 | 630k | const unsigned smask = mask >> (sidx << 4); |
100 | 630k | masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] |= smask; |
101 | 630k | } |
102 | | |
103 | | // top block edge |
104 | 591k | for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) { |
105 | 475k | const int sidx = mask >= 0x10000; |
106 | 475k | const unsigned smask = mask >> (sidx << 4); |
107 | 475k | masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] |= smask; |
108 | 475k | } |
109 | | |
110 | 115k | if (!skip) { |
111 | | // inner (tx) left|right edges |
112 | 368k | for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) { |
113 | 314k | const int sidx = mask >= 0x10000U; |
114 | 314k | const unsigned smask = mask >> (sidx << 4); |
115 | 314k | int ltx = txa[0][0][y][0]; |
116 | 314k | int step = txa[0][1][y][0]; |
117 | 393k | for (x = step; x < w4; x += step) { |
118 | 78.8k | const int rtx = txa[0][0][y][x]; |
119 | 78.8k | masks[0][bx4 + x][imin(rtx, ltx)][sidx] |= smask; |
120 | 78.8k | ltx = rtx; |
121 | 78.8k | step = txa[0][1][y][x]; |
122 | 78.8k | } |
123 | 314k | } |
124 | | |
125 | | // top |
126 | | // inner (tx) --- edges |
127 | | // bottom |
128 | 195k | for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) { |
129 | 141k | const int sidx = mask >= 0x10000U; |
130 | 141k | const unsigned smask = mask >> (sidx << 4); |
131 | 141k | int ttx = txa[1][0][0][x]; |
132 | 141k | int step = txa[1][1][0][x]; |
133 | 236k | for (y = step; y < h4; y += step) { |
134 | 95.3k | const int btx = txa[1][0][y][x]; |
135 | 95.3k | masks[1][by4 + y][imin(ttx, btx)][sidx] |= smask; |
136 | 95.3k | ttx = btx; |
137 | 95.3k | step = txa[1][1][y][x]; |
138 | 95.3k | } |
139 | 141k | } |
140 | 54.1k | } |
141 | | |
142 | 746k | for (y = 0; y < h4; y++) |
143 | 630k | l[y] = txa[0][0][y][w4 - 1]; |
144 | 115k | memcpy(a, txa[1][0][h4 - 1], w4); |
145 | 115k | } |
146 | | |
147 | | static inline void mask_edges_intra(uint16_t (*const masks)[32][3][2], |
148 | | const int by4, const int bx4, |
149 | | const int w4, const int h4, |
150 | | const enum RectTxfmSize tx, |
151 | | uint8_t *const a, uint8_t *const l) |
152 | 1.26M | { |
153 | 1.26M | const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx]; |
154 | 1.26M | const int twl4 = t_dim->lw, thl4 = t_dim->lh; |
155 | 1.26M | const int twl4c = imin(2, twl4), thl4c = imin(2, thl4); |
156 | 1.26M | int y, x; |
157 | | |
158 | | // left block edge |
159 | 1.26M | unsigned mask = 1U << by4; |
160 | 6.97M | for (y = 0; y < h4; y++, mask <<= 1) { |
161 | 5.71M | const int sidx = mask >= 0x10000; |
162 | 5.71M | const unsigned smask = mask >> (sidx << 4); |
163 | 5.71M | masks[0][bx4][imin(twl4c, l[y])][sidx] |= smask; |
164 | 5.71M | } |
165 | | |
166 | | // top block edge |
167 | 6.95M | for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) { |
168 | 5.68M | const int sidx = mask >= 0x10000; |
169 | 5.68M | const unsigned smask = mask >> (sidx << 4); |
170 | 5.68M | masks[1][by4][imin(thl4c, a[x])][sidx] |= smask; |
171 | 5.68M | } |
172 | | |
173 | | // inner (tx) left|right edges |
174 | 1.26M | const int hstep = t_dim->w; |
175 | 1.26M | unsigned t = 1U << by4; |
176 | 1.26M | unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t); |
177 | 1.26M | unsigned inner1 = inner & 0xffff, inner2 = inner >> 16; |
178 | 1.48M | for (x = hstep; x < w4; x += hstep) { |
179 | 224k | if (inner1) masks[0][bx4 + x][twl4c][0] |= inner1; |
180 | 224k | if (inner2) masks[0][bx4 + x][twl4c][1] |= inner2; |
181 | 224k | } |
182 | | |
183 | | // top |
184 | | // inner (tx) --- edges |
185 | | // bottom |
186 | 1.26M | const int vstep = t_dim->h; |
187 | 1.26M | t = 1U << bx4; |
188 | 1.26M | inner = (unsigned) ((((uint64_t) t) << w4) - t); |
189 | 1.26M | inner1 = inner & 0xffff; |
190 | 1.26M | inner2 = inner >> 16; |
191 | 1.49M | for (y = vstep; y < h4; y += vstep) { |
192 | 227k | if (inner1) masks[1][by4 + y][thl4c][0] |= inner1; |
193 | 227k | if (inner2) masks[1][by4 + y][thl4c][1] |= inner2; |
194 | 227k | } |
195 | | |
196 | 1.26M | dav1d_memset_likely_pow2(a, thl4c, w4); |
197 | 1.26M | dav1d_memset_likely_pow2(l, twl4c, h4); |
198 | 1.26M | } |
199 | | |
200 | | static void mask_edges_chroma(uint16_t (*const masks)[32][2][2], |
201 | | const int cby4, const int cbx4, |
202 | | const int cw4, const int ch4, |
203 | | const int skip_inter, |
204 | | const enum RectTxfmSize tx, |
205 | | uint8_t *const a, uint8_t *const l, |
206 | | const int ss_hor, const int ss_ver) |
207 | 1.16M | { |
208 | 1.16M | const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx]; |
209 | 1.16M | const int twl4 = t_dim->lw, thl4 = t_dim->lh; |
210 | 1.16M | const int twl4c = !!twl4, thl4c = !!thl4; |
211 | 1.16M | int y, x; |
212 | 1.16M | const int vbits = 4 - ss_ver, hbits = 4 - ss_hor; |
213 | 1.16M | const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor; |
214 | 1.16M | const unsigned vmax = 1 << vmask, hmax = 1 << hmask; |
215 | | |
216 | | // left block edge |
217 | 1.16M | unsigned mask = 1U << cby4; |
218 | 4.98M | for (y = 0; y < ch4; y++, mask <<= 1) { |
219 | 3.81M | const int sidx = mask >= vmax; |
220 | 3.81M | const unsigned smask = mask >> (sidx << vbits); |
221 | 3.81M | masks[0][cbx4][imin(twl4c, l[y])][sidx] |= smask; |
222 | 3.81M | } |
223 | | |
224 | | // top block edge |
225 | 4.78M | for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) { |
226 | 3.62M | const int sidx = mask >= hmax; |
227 | 3.62M | const unsigned smask = mask >> (sidx << hbits); |
228 | 3.62M | masks[1][cby4][imin(thl4c, a[x])][sidx] |= smask; |
229 | 3.62M | } |
230 | | |
231 | 1.16M | if (!skip_inter) { |
232 | | // inner (tx) left|right edges |
233 | 1.10M | const int hstep = t_dim->w; |
234 | 1.10M | unsigned t = 1U << cby4; |
235 | 1.10M | unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t); |
236 | 1.10M | unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask; |
237 | 1.17M | for (x = hstep; x < cw4; x += hstep) { |
238 | 74.1k | if (inner1) masks[0][cbx4 + x][twl4c][0] |= inner1; |
239 | 74.1k | if (inner2) masks[0][cbx4 + x][twl4c][1] |= inner2; |
240 | 74.1k | } |
241 | | |
242 | | // top |
243 | | // inner (tx) --- edges |
244 | | // bottom |
245 | 1.10M | const int vstep = t_dim->h; |
246 | 1.10M | t = 1U << cbx4; |
247 | 1.10M | inner = (unsigned) ((((uint64_t) t) << cw4) - t); |
248 | 1.10M | inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask; |
249 | 1.19M | for (y = vstep; y < ch4; y += vstep) { |
250 | 94.9k | if (inner1) masks[1][cby4 + y][thl4c][0] |= inner1; |
251 | 94.9k | if (inner2) masks[1][cby4 + y][thl4c][1] |= inner2; |
252 | 94.9k | } |
253 | 1.10M | } |
254 | | |
255 | 1.16M | dav1d_memset_likely_pow2(a, thl4c, cw4); |
256 | 1.16M | dav1d_memset_likely_pow2(l, twl4c, ch4); |
257 | 1.16M | } |
258 | | |
259 | | void dav1d_create_lf_mask_intra(Av1Filter *const lflvl, |
260 | | uint8_t (*const level_cache)[4], |
261 | | const ptrdiff_t b4_stride, |
262 | | const uint8_t (*filter_level)[8][2], |
263 | | const int bx, const int by, |
264 | | const int iw, const int ih, |
265 | | const enum BlockSize bs, |
266 | | const enum RectTxfmSize ytx, |
267 | | const enum RectTxfmSize uvtx, |
268 | | const enum Dav1dPixelLayout layout, |
269 | | uint8_t *const ay, uint8_t *const ly, |
270 | | uint8_t *const auv, uint8_t *const luv) |
271 | 1.28M | { |
272 | 1.28M | const uint8_t *const b_dim = dav1d_block_dimensions[bs]; |
273 | 1.28M | const int bw4 = imin(iw - bx, b_dim[0]); |
274 | 1.28M | const int bh4 = imin(ih - by, b_dim[1]); |
275 | 1.28M | const int bx4 = bx & 31; |
276 | 1.28M | const int by4 = by & 31; |
277 | 1.28M | assert(bw4 >= 0 && bh4 >= 0); |
278 | | |
279 | 1.28M | if (bw4 && bh4) { |
280 | 1.26M | uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx; |
281 | 6.95M | for (int y = 0; y < bh4; y++) { |
282 | 56.9M | for (int x = 0; x < bw4; x++) { |
283 | 51.2M | level_cache_ptr[x][0] = filter_level[0][0][0]; |
284 | 51.2M | level_cache_ptr[x][1] = filter_level[1][0][0]; |
285 | 51.2M | } |
286 | 5.69M | level_cache_ptr += b4_stride; |
287 | 5.69M | } |
288 | | |
289 | 1.26M | mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly); |
290 | 1.26M | } |
291 | | |
292 | 1.28M | if (!auv) return; |
293 | | |
294 | 1.06M | const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; |
295 | 1.06M | const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; |
296 | 1.06M | const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor), |
297 | 1.06M | (b_dim[0] + ss_hor) >> ss_hor); |
298 | 1.06M | const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver), |
299 | 1.06M | (b_dim[1] + ss_ver) >> ss_ver); |
300 | 1.06M | assert(cbw4 >= 0 && cbh4 >= 0); |
301 | | |
302 | 1.06M | if (!cbw4 || !cbh4) return; |
303 | | |
304 | 1.05M | const int cbx4 = bx4 >> ss_hor; |
305 | 1.05M | const int cby4 = by4 >> ss_ver; |
306 | | |
307 | 1.05M | uint8_t (*level_cache_ptr)[4] = |
308 | 1.05M | level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor); |
309 | 4.35M | for (int y = 0; y < cbh4; y++) { |
310 | 24.2M | for (int x = 0; x < cbw4; x++) { |
311 | 20.9M | level_cache_ptr[x][2] = filter_level[2][0][0]; |
312 | 20.9M | level_cache_ptr[x][3] = filter_level[3][0][0]; |
313 | 20.9M | } |
314 | 3.29M | level_cache_ptr += b4_stride; |
315 | 3.29M | } |
316 | | |
317 | 1.05M | mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx, |
318 | 1.05M | auv, luv, ss_hor, ss_ver); |
319 | 1.05M | } |
320 | | |
321 | | void dav1d_create_lf_mask_inter(Av1Filter *const lflvl, |
322 | | uint8_t (*const level_cache)[4], |
323 | | const ptrdiff_t b4_stride, |
324 | | const uint8_t (*filter_level)[8][2], |
325 | | const int bx, const int by, |
326 | | const int iw, const int ih, |
327 | | const int skip, const enum BlockSize bs, |
328 | | const enum RectTxfmSize max_ytx, |
329 | | const uint16_t *const tx_masks, |
330 | | const enum RectTxfmSize uvtx, |
331 | | const enum Dav1dPixelLayout layout, |
332 | | uint8_t *const ay, uint8_t *const ly, |
333 | | uint8_t *const auv, uint8_t *const luv) |
334 | 120k | { |
335 | 120k | const uint8_t *const b_dim = dav1d_block_dimensions[bs]; |
336 | 120k | const int bw4 = imin(iw - bx, b_dim[0]); |
337 | 120k | const int bh4 = imin(ih - by, b_dim[1]); |
338 | 120k | const int bx4 = bx & 31; |
339 | 120k | const int by4 = by & 31; |
340 | 120k | assert(bw4 >= 0 && bh4 >= 0); |
341 | | |
342 | 120k | if (bw4 && bh4) { |
343 | 115k | uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx; |
344 | 746k | for (int y = 0; y < bh4; y++) { |
345 | 4.80M | for (int x = 0; x < bw4; x++) { |
346 | 4.17M | level_cache_ptr[x][0] = filter_level[0][0][0]; |
347 | 4.17M | level_cache_ptr[x][1] = filter_level[1][0][0]; |
348 | 4.17M | } |
349 | 630k | level_cache_ptr += b4_stride; |
350 | 630k | } |
351 | | |
352 | 115k | mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip, |
353 | 115k | max_ytx, tx_masks, ay, ly); |
354 | 115k | } |
355 | | |
356 | 120k | if (!auv) return; |
357 | | |
358 | 109k | const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; |
359 | 109k | const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; |
360 | 109k | const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor), |
361 | 109k | (b_dim[0] + ss_hor) >> ss_hor); |
362 | 109k | const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver), |
363 | 109k | (b_dim[1] + ss_ver) >> ss_ver); |
364 | 109k | assert(cbw4 >= 0 && cbh4 >= 0); |
365 | | |
366 | 109k | if (!cbw4 || !cbh4) return; |
367 | | |
368 | 105k | const int cbx4 = bx4 >> ss_hor; |
369 | 105k | const int cby4 = by4 >> ss_ver; |
370 | | |
371 | 105k | uint8_t (*level_cache_ptr)[4] = |
372 | 105k | level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor); |
373 | 626k | for (int y = 0; y < cbh4; y++) { |
374 | 4.36M | for (int x = 0; x < cbw4; x++) { |
375 | 3.84M | level_cache_ptr[x][2] = filter_level[2][0][0]; |
376 | 3.84M | level_cache_ptr[x][3] = filter_level[3][0][0]; |
377 | 3.84M | } |
378 | 521k | level_cache_ptr += b4_stride; |
379 | 521k | } |
380 | | |
381 | 105k | mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx, |
382 | 105k | auv, luv, ss_hor, ss_ver); |
383 | 105k | } |
384 | | |
385 | 25.8k | void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) { |
386 | | // set E/I/H values from loopfilter level |
387 | 25.8k | const int sharp = filter_sharpness; |
388 | 1.67M | for (int level = 0; level < 64; level++) { |
389 | 1.65M | int limit = level; |
390 | | |
391 | 1.65M | if (sharp > 0) { |
392 | 1.42M | limit >>= (sharp + 3) >> 2; |
393 | 1.42M | limit = imin(limit, 9 - sharp); |
394 | 1.42M | } |
395 | 1.65M | limit = imax(limit, 1); |
396 | | |
397 | 1.65M | lim_lut->i[level] = limit; |
398 | 1.65M | lim_lut->e[level] = 2 * (level + 2) + limit; |
399 | 1.65M | } |
400 | 25.8k | lim_lut->sharp[0] = (sharp + 3) >> 2; |
401 | 25.8k | lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff; |
402 | 25.8k | } |
403 | | |
404 | | static void calc_lf_value(uint8_t (*const lflvl_values)[2], |
405 | | const int base_lvl, const int lf_delta, |
406 | | const int seg_delta, |
407 | | const Dav1dLoopfilterModeRefDeltas *const mr_delta) |
408 | 982k | { |
409 | 982k | const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63); |
410 | | |
411 | 982k | if (!mr_delta) { |
412 | 878k | memset(lflvl_values, base, sizeof(*lflvl_values) * 8); |
413 | 878k | } else { |
414 | 104k | const int sh = base >= 32; |
415 | 104k | lflvl_values[0][0] = lflvl_values[0][1] = |
416 | 104k | iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63); |
417 | 834k | for (int r = 1; r < 8; r++) { |
418 | 2.18M | for (int m = 0; m < 2; m++) { |
419 | 1.45M | const int delta = |
420 | 1.45M | mr_delta->mode_delta[m] + mr_delta->ref_delta[r]; |
421 | 1.45M | lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63); |
422 | 1.45M | } |
423 | 729k | } |
424 | 104k | } |
425 | 982k | } |
426 | | |
427 | | static inline void calc_lf_value_chroma(uint8_t (*const lflvl_values)[2], |
428 | | const int base_lvl, const int lf_delta, |
429 | | const int seg_delta, |
430 | | const Dav1dLoopfilterModeRefDeltas *const mr_delta) |
431 | 514k | { |
432 | 514k | if (!base_lvl) |
433 | 46.9k | memset(lflvl_values, 0, sizeof(*lflvl_values) * 8); |
434 | 467k | else |
435 | 467k | calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta); |
436 | 514k | } |
437 | | |
438 | | void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2], |
439 | | const Dav1dFrameHeader *const hdr, |
440 | | const int8_t lf_delta[4]) |
441 | 60.7k | { |
442 | 60.7k | const int n_seg = hdr->segmentation.enabled ? 8 : 1; |
443 | | |
444 | 60.7k | if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) { |
445 | 2.44k | memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg); |
446 | 2.44k | return; |
447 | 2.44k | } |
448 | | |
449 | 58.3k | const Dav1dLoopfilterModeRefDeltas *const mr_deltas = |
450 | 58.3k | hdr->loopfilter.mode_ref_delta_enabled ? |
451 | 58.3k | &hdr->loopfilter.mode_ref_deltas : NULL; |
452 | 315k | for (int s = 0; s < n_seg; s++) { |
453 | 257k | const Dav1dSegmentationData *const segd = |
454 | 257k | hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL; |
455 | | |
456 | 257k | calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0], |
457 | 257k | lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas); |
458 | 257k | calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1], |
459 | 257k | lf_delta[hdr->delta.lf.multi ? 1 : 0], |
460 | 257k | segd ? segd->delta_lf_y_h : 0, mr_deltas); |
461 | 257k | calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u, |
462 | 257k | lf_delta[hdr->delta.lf.multi ? 2 : 0], |
463 | 257k | segd ? segd->delta_lf_u : 0, mr_deltas); |
464 | 257k | calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v, |
465 | 257k | lf_delta[hdr->delta.lf.multi ? 3 : 0], |
466 | 257k | segd ? segd->delta_lf_v : 0, mr_deltas); |
467 | 257k | } |
468 | 58.3k | } |