/work/dav1d/src/lf_mask.c

Source
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#include <string.h>

#include "common/intops.h"

#include "src/ctx.h"
#include "src/levels.h"
#include "src/lf_mask.h"
#include "src/tables.h"

static void decomp_tx(uint8_t (*const txa)[2 /* txsz, step */][32 /* y */][32 /* x */],
                      const enum RectTxfmSize from,
                      const int depth,
                      const int y_off, const int x_off,
                      const uint16_t *const tx_masks)
{
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
    const int is_split = (from == (int) TX_4X4 || depth > 1) ? 0 :
        (tx_masks[depth] >> (y_off * 4 + x_off)) & 1;

    if (is_split) {
        const enum RectTxfmSize sub = t_dim->sub;
        const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;

        decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
        if (t_dim->w >= t_dim->h)
            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
                      sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
        if (t_dim->h >= t_dim->w) {
            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
                      sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
            if (t_dim->w >= t_dim->h)
                decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
                          sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
        }
    } else {
        const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);

#define set_ctx(rep_macro) \
        for (int y = 0; y < t_dim->h; y++) { \
            rep_macro(txa[0][0][y], 0, lw); \
            rep_macro(txa[1][0][y], 0, lh); \
            txa[0][1][y][0] = t_dim->w; \
        }
        case_set_upto16(t_dim->lw);
#undef set_ctx
        dav1d_memset_pow2[t_dim->lw](txa[1][1][0], t_dim->h);
    }
}

static inline void mask_edges_inter(uint16_t (*const masks)[32][3][2],
                                    const int by4, const int bx4,
                                    const int w4, const int h4, const int skip,
                                    const enum RectTxfmSize max_tx,
                                    const uint16_t *const tx_masks,
                                    uint8_t *const a, uint8_t *const l)
{
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx];
    int y, x;

    ALIGN_STK_16(uint8_t, txa, 2 /* edge */, [2 /* txsz, step */][32 /* y */][32 /* x */]);
    for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
        for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
                      max_tx, 0, y_off, x_off, tx_masks);

    // left block edge
    unsigned mask = 1U << by4;
    for (y = 0; y < h4; y++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] |= smask;
    }

    // top block edge
    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] |= smask;
    }

    if (!skip) {
        // inner (tx) left|right edges
        for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) {
            const int sidx = mask >= 0x10000U;
            const unsigned smask = mask >> (sidx << 4);
            int ltx = txa[0][0][y][0];
            int step = txa[0][1][y][0];
            for (x = step; x < w4; x += step) {
                const int rtx = txa[0][0][y][x];
                masks[0][bx4 + x][imin(rtx, ltx)][sidx] |= smask;
                ltx = rtx;
                step = txa[0][1][y][x];
            }
        }

        //            top
        // inner (tx) --- edges
        //           bottom
        for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
            const int sidx = mask >= 0x10000U;
            const unsigned smask = mask >> (sidx << 4);
            int ttx = txa[1][0][0][x];
            int step = txa[1][1][0][x];
            for (y = step; y < h4; y += step) {
                const int btx = txa[1][0][y][x];
                masks[1][by4 + y][imin(ttx, btx)][sidx] |= smask;
                ttx = btx;
                step = txa[1][1][y][x];
            }
        }
    }

    for (y = 0; y < h4; y++)
        l[y] = txa[0][0][y][w4 - 1];
    memcpy(a, txa[1][0][h4 - 1], w4);
}

static inline void mask_edges_intra(uint16_t (*const masks)[32][3][2],
                                    const int by4, const int bx4,
                                    const int w4, const int h4,
                                    const enum RectTxfmSize tx,
                                    uint8_t *const a, uint8_t *const l)
{
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
    const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
    int y, x;

    // left block edge
    unsigned mask = 1U << by4;
    for (y = 0; y < h4; y++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[0][bx4][imin(twl4c, l[y])][sidx] |= smask;
    }

    // top block edge
    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[1][by4][imin(thl4c, a[x])][sidx] |= smask;
    }

    // inner (tx) left|right edges
    const int hstep = t_dim->w;
    unsigned t = 1U << by4;
    unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t);
    unsigned inner1 = inner & 0xffff, inner2 = inner >> 16;
    for (x = hstep; x < w4; x += hstep) {
        if (inner1) masks[0][bx4 + x][twl4c][0] |= inner1;
        if (inner2) masks[0][bx4 + x][twl4c][1] |= inner2;
    }

    //            top
    // inner (tx) --- edges
    //           bottom
    const int vstep = t_dim->h;
    t = 1U << bx4;
    inner = (unsigned) ((((uint64_t) t) << w4) - t);
    inner1 = inner & 0xffff;
    inner2 = inner >> 16;
    for (y = vstep; y < h4; y += vstep) {
        if (inner1) masks[1][by4 + y][thl4c][0] |= inner1;
        if (inner2) masks[1][by4 + y][thl4c][1] |= inner2;
    }

    dav1d_memset_likely_pow2(a, thl4c, w4);
    dav1d_memset_likely_pow2(l, twl4c, h4);
}

static void mask_edges_chroma(uint16_t (*const masks)[32][2][2],
                              const int cby4, const int cbx4,
                              const int cw4, const int ch4,
                              const int skip_inter,
                              const enum RectTxfmSize tx,
                              uint8_t *const a, uint8_t *const l,
                              const int ss_hor, const int ss_ver)
{
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
    const int twl4c = !!twl4, thl4c = !!thl4;
    int y, x;
    const int vbits = 4 - ss_ver, hbits = 4 - ss_hor;
    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
    const unsigned vmax = 1 << vmask, hmax = 1 << hmask;

    // left block edge
    unsigned mask = 1U << cby4;
    for (y = 0; y < ch4; y++, mask <<= 1) {
        const int sidx = mask >= vmax;
        const unsigned smask = mask >> (sidx << vbits);
        masks[0][cbx4][imin(twl4c, l[y])][sidx] |= smask;
    }

    // top block edge
    for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) {
        const int sidx = mask >= hmax;
        const unsigned smask = mask >> (sidx << hbits);
        masks[1][cby4][imin(thl4c, a[x])][sidx] |= smask;
    }

    if (!skip_inter) {
        // inner (tx) left|right edges
        const int hstep = t_dim->w;
        unsigned t = 1U << cby4;
        unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t);
        unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask;
        for (x = hstep; x < cw4; x += hstep) {
            if (inner1) masks[0][cbx4 + x][twl4c][0] |= inner1;
            if (inner2) masks[0][cbx4 + x][twl4c][1] |= inner2;
        }

        //            top
        // inner (tx) --- edges
        //           bottom
        const int vstep = t_dim->h;
        t = 1U << cbx4;
        inner = (unsigned) ((((uint64_t) t) << cw4) - t);
        inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask;
        for (y = vstep; y < ch4; y += vstep) {
            if (inner1) masks[1][cby4 + y][thl4c][0] |= inner1;
            if (inner2) masks[1][cby4 + y][thl4c][1] |= inner2;
        }
    }

    dav1d_memset_likely_pow2(a, thl4c, cw4);
    dav1d_memset_likely_pow2(l, twl4c, ch4);
}

void dav1d_create_lf_mask_intra(Av1Filter *const lflvl,
                                uint8_t (*const level_cache)[4],
                                const ptrdiff_t b4_stride,
                                const uint8_t (*filter_level)[8][2],
                                const int bx, const int by,
                                const int iw, const int ih,
                                const enum BlockSize bs,
                                const enum RectTxfmSize ytx,
                                const enum RectTxfmSize uvtx,
                                const enum Dav1dPixelLayout layout,
                                uint8_t *const ay, uint8_t *const ly,
                                uint8_t *const auv, uint8_t *const luv)
{
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
    const int bw4 = imin(iw - bx, b_dim[0]);
    const int bh4 = imin(ih - by, b_dim[1]);
    const int bx4 = bx & 31;
    const int by4 = by & 31;
    assert(bw4 >= 0 && bh4 >= 0);

    if (bw4 && bh4) {
        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
        for (int y = 0; y < bh4; y++) {
            for (int x = 0; x < bw4; x++) {
                level_cache_ptr[x][0] = filter_level[0][0][0];
                level_cache_ptr[x][1] = filter_level[1][0][0];
            }
            level_cache_ptr += b4_stride;
        }

        mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
    }

    if (!auv) return;

    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
                          (b_dim[0] + ss_hor) >> ss_hor);
    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
                          (b_dim[1] + ss_ver) >> ss_ver);
    assert(cbw4 >= 0 && cbh4 >= 0);

    if (!cbw4 || !cbh4) return;

    const int cbx4 = bx4 >> ss_hor;
    const int cby4 = by4 >> ss_ver;

    uint8_t (*level_cache_ptr)[4] =
        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
    for (int y = 0; y < cbh4; y++) {
        for (int x = 0; x < cbw4; x++) {
            level_cache_ptr[x][2] = filter_level[2][0][0];
            level_cache_ptr[x][3] = filter_level[3][0][0];
        }
        level_cache_ptr += b4_stride;
    }

    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx,
                      auv, luv, ss_hor, ss_ver);
}

void dav1d_create_lf_mask_inter(Av1Filter *const lflvl,
                                uint8_t (*const level_cache)[4],
                                const ptrdiff_t b4_stride,
                                const uint8_t (*filter_level)[8][2],
                                const int bx, const int by,
                                const int iw, const int ih,
                                const int skip, const enum BlockSize bs,
                                const enum RectTxfmSize max_ytx,
                                const uint16_t *const tx_masks,
                                const enum RectTxfmSize uvtx,
                                const enum Dav1dPixelLayout layout,
                                uint8_t *const ay, uint8_t *const ly,
                                uint8_t *const auv, uint8_t *const luv)
{
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
    const int bw4 = imin(iw - bx, b_dim[0]);
    const int bh4 = imin(ih - by, b_dim[1]);
    const int bx4 = bx & 31;
    const int by4 = by & 31;
    assert(bw4 >= 0 && bh4 >= 0);

    if (bw4 && bh4) {
        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
        for (int y = 0; y < bh4; y++) {
            for (int x = 0; x < bw4; x++) {
                level_cache_ptr[x][0] = filter_level[0][0][0];
                level_cache_ptr[x][1] = filter_level[1][0][0];
            }
            level_cache_ptr += b4_stride;
        }

        mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
                         max_ytx, tx_masks, ay, ly);
    }

    if (!auv) return;

    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
                          (b_dim[0] + ss_hor) >> ss_hor);
    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
                          (b_dim[1] + ss_ver) >> ss_ver);
    assert(cbw4 >= 0 && cbh4 >= 0);

    if (!cbw4 || !cbh4) return;

    const int cbx4 = bx4 >> ss_hor;
    const int cby4 = by4 >> ss_ver;

    uint8_t (*level_cache_ptr)[4] =
        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
    for (int y = 0; y < cbh4; y++) {
        for (int x = 0; x < cbw4; x++) {
            level_cache_ptr[x][2] = filter_level[2][0][0];
            level_cache_ptr[x][3] = filter_level[3][0][0];
        }
        level_cache_ptr += b4_stride;
    }

    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx,
                      auv, luv, ss_hor, ss_ver);
}

void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
    // set E/I/H values from loopfilter level
    const int sharp = filter_sharpness;
    for (int level = 0; level < 64; level++) {
        int limit = level;

        if (sharp > 0) {
            limit >>= (sharp + 3) >> 2;
            limit = imin(limit, 9 - sharp);
        }
        limit = imax(limit, 1);

        lim_lut->i[level] = limit;
        lim_lut->e[level] = 2 * (level + 2) + limit;
    }
    lim_lut->sharp[0] = (sharp + 3) >> 2;
    lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff;
}

static void calc_lf_value(uint8_t (*const lflvl_values)[2],
                          const int base_lvl, const int lf_delta,
                          const int seg_delta,
                          const Dav1dLoopfilterModeRefDeltas *const mr_delta)
{
    const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);

    if (!mr_delta) {
        memset(lflvl_values, base, sizeof(*lflvl_values) * 8);
    } else {
        const int sh = base >= 32;
        lflvl_values[0][0] = lflvl_values[0][1] =
            iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
        for (int r = 1; r < 8; r++) {
            for (int m = 0; m < 2; m++) {
                const int delta =
                    mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
                lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
            }
        }
    }
}

static inline void calc_lf_value_chroma(uint8_t (*const lflvl_values)[2],
                                        const int base_lvl, const int lf_delta,
                                        const int seg_delta,
                                        const Dav1dLoopfilterModeRefDeltas *const mr_delta)
{
    if (!base_lvl)
        memset(lflvl_values, 0, sizeof(*lflvl_values) * 8);
    else
        calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
}

void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
                          const Dav1dFrameHeader *const hdr,
                          const int8_t lf_delta[4])
{
    const int n_seg = hdr->segmentation.enabled ? 8 : 1;

    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
        memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg);
        return;
    }

    const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
        hdr->loopfilter.mode_ref_delta_enabled ?
        &hdr->loopfilter.mode_ref_deltas : NULL;
    for (int s = 0; s < n_seg; s++) {
        const Dav1dSegmentationData *const segd =
            hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;

        calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0],
                      lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
        calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1],
                      lf_delta[hdr->delta.lf.multi ? 1 : 0],
                      segd ? segd->delta_lf_y_h : 0, mr_deltas);
        calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u,
                             lf_delta[hdr->delta.lf.multi ? 2 : 0],
                             segd ? segd->delta_lf_u : 0, mr_deltas);
        calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v,
                             lf_delta[hdr->delta.lf.multi ? 3 : 0],
                             segd ? segd->delta_lf_v : 0, mr_deltas);
    }
}

Coverage Report

Created: 2026-05-30 06:10

Line	Count	Source
1		/*
2		* Copyright © 2018, VideoLAN and dav1d authors
3		* Copyright © 2018, Two Orioles, LLC
4		* All rights reserved.
5		*
6		* Redistribution and use in source and binary forms, with or without
7		* modification, are permitted provided that the following conditions are met:
8		*
9		* 1. Redistributions of source code must retain the above copyright notice, this
10		* list of conditions and the following disclaimer.
11		*
12		* 2. Redistributions in binary form must reproduce the above copyright notice,
13		* this list of conditions and the following disclaimer in the documentation
14		* and/or other materials provided with the distribution.
15		*
16		* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17		* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18		* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19		* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20		* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21		* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22		* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23		* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24		* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25		* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26		*/
27
28		#include "config.h"
29
30		#include <string.h>
31
32		#include "common/intops.h"
33
34		#include "src/ctx.h"
35		#include "src/levels.h"
36		#include "src/lf_mask.h"
37		#include "src/tables.h"
38
39		static void decomp_tx(uint8_t (const txa)[2 / txsz, step /][32 / y /][32 / x */],
40		const enum RectTxfmSize from,
41		const int depth,
42		const int y_off, const int x_off,
43		const uint16_t *const tx_masks)
44	244k	{
45	244k	const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
46	244k	const int is_split = (from == (int) TX_4X4 \|\| depth > 1) ? 0 :
47	244k	(tx_masks[depth] >> (y_off * 4 + x_off)) & 1;
48
49	244k	if (is_split) {
50	2.67k	const enum RectTxfmSize sub = t_dim->sub;
51	2.67k	const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;
52
53	2.67k	decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
54	2.67k	if (t_dim->w >= t_dim->h)
55	2.24k	decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
56	2.24k	sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
57	2.67k	if (t_dim->h >= t_dim->w) {
58	1.97k	decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
59	1.97k	sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
60	1.97k	if (t_dim->w >= t_dim->h)
61	1.54k	decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
62	1.54k	sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
63	1.97k	}
64	241k	} else {
65	241k	const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);
66
67	241k	#define set_ctx(rep_macro) \
68	1.30M	for (int y = 0; y < t_dim->h; y++) { \
69	1.06M	rep_macro(txa[0][0][y], 0, lw); \
70	1.06M	rep_macro(txa[1][0][y], 0, lh); \
71	1.06M	txa[0][1][y][0] = t_dim->w; \
72	1.06M	}
73	241k	case_set_upto16(t_dim->lw);
74	241k	#undef set_ctx
75	241k	dav1d_memset_pow2[t_dim->lw](txa[1][1][0], t_dim->h);
76	241k	}
77	244k	}
78
79		static inline void mask_edges_inter(uint16_t (*const masks)[32][3][2],
80		const int by4, const int bx4,
81		const int w4, const int h4, const int skip,
82		const enum RectTxfmSize max_tx,
83		const uint16_t *const tx_masks,
84		uint8_t const a, uint8_t const l)
85	115k	{
86	115k	const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx];
87	115k	int y, x;
88
89	115k	ALIGN_STK_16(uint8_t, txa, 2 /* edge /, [2 / txsz, step /][32 / y /][32 / x */]);
90	264k	for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
91	384k	for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
92	235k	decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
93	235k	max_tx, 0, y_off, x_off, tx_masks);
94
95		// left block edge
96	115k	unsigned mask = 1U << by4;
97	746k	for (y = 0; y < h4; y++, mask <<= 1) {
98	630k	const int sidx = mask >= 0x10000;
99	630k	const unsigned smask = mask >> (sidx << 4);
100	630k	masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] \|= smask;
101	630k	}
102
103		// top block edge
104	591k	for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
105	475k	const int sidx = mask >= 0x10000;
106	475k	const unsigned smask = mask >> (sidx << 4);
107	475k	masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] \|= smask;
108	475k	}
109
110	115k	if (!skip) {
111		// inner (tx) left\|right edges
112	368k	for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) {
113	314k	const int sidx = mask >= 0x10000U;
114	314k	const unsigned smask = mask >> (sidx << 4);
115	314k	int ltx = txa[0][0][y][0];
116	314k	int step = txa[0][1][y][0];
117	393k	for (x = step; x < w4; x += step) {
118	78.8k	const int rtx = txa[0][0][y][x];
119	78.8k	masks[0][bx4 + x][imin(rtx, ltx)][sidx] \|= smask;
120	78.8k	ltx = rtx;
121	78.8k	step = txa[0][1][y][x];
122	78.8k	}
123	314k	}
124
125		// top
126		// inner (tx) --- edges
127		// bottom
128	195k	for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
129	141k	const int sidx = mask >= 0x10000U;
130	141k	const unsigned smask = mask >> (sidx << 4);
131	141k	int ttx = txa[1][0][0][x];
132	141k	int step = txa[1][1][0][x];
133	236k	for (y = step; y < h4; y += step) {
134	95.3k	const int btx = txa[1][0][y][x];
135	95.3k	masks[1][by4 + y][imin(ttx, btx)][sidx] \|= smask;
136	95.3k	ttx = btx;
137	95.3k	step = txa[1][1][y][x];
138	95.3k	}
139	141k	}
140	54.1k	}
141
142	746k	for (y = 0; y < h4; y++)
143	630k	l[y] = txa[0][0][y][w4 - 1];
144	115k	memcpy(a, txa[1][0][h4 - 1], w4);
145	115k	}
146
147		static inline void mask_edges_intra(uint16_t (*const masks)[32][3][2],
148		const int by4, const int bx4,
149		const int w4, const int h4,
150		const enum RectTxfmSize tx,
151		uint8_t const a, uint8_t const l)
152	1.26M	{
153	1.26M	const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
154	1.26M	const int twl4 = t_dim->lw, thl4 = t_dim->lh;
155	1.26M	const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
156	1.26M	int y, x;
157
158		// left block edge
159	1.26M	unsigned mask = 1U << by4;
160	6.97M	for (y = 0; y < h4; y++, mask <<= 1) {
161	5.71M	const int sidx = mask >= 0x10000;
162	5.71M	const unsigned smask = mask >> (sidx << 4);
163	5.71M	masks[0][bx4][imin(twl4c, l[y])][sidx] \|= smask;
164	5.71M	}
165
166		// top block edge
167	6.95M	for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
168	5.68M	const int sidx = mask >= 0x10000;
169	5.68M	const unsigned smask = mask >> (sidx << 4);
170	5.68M	masks[1][by4][imin(thl4c, a[x])][sidx] \|= smask;
171	5.68M	}
172
173		// inner (tx) left\|right edges
174	1.26M	const int hstep = t_dim->w;
175	1.26M	unsigned t = 1U << by4;
176	1.26M	unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t);
177	1.26M	unsigned inner1 = inner & 0xffff, inner2 = inner >> 16;
178	1.48M	for (x = hstep; x < w4; x += hstep) {
179	224k	if (inner1) masks[0][bx4 + x][twl4c][0] \|= inner1;
180	224k	if (inner2) masks[0][bx4 + x][twl4c][1] \|= inner2;
181	224k	}
182
183		// top
184		// inner (tx) --- edges
185		// bottom
186	1.26M	const int vstep = t_dim->h;
187	1.26M	t = 1U << bx4;
188	1.26M	inner = (unsigned) ((((uint64_t) t) << w4) - t);
189	1.26M	inner1 = inner & 0xffff;
190	1.26M	inner2 = inner >> 16;
191	1.49M	for (y = vstep; y < h4; y += vstep) {
192	227k	if (inner1) masks[1][by4 + y][thl4c][0] \|= inner1;
193	227k	if (inner2) masks[1][by4 + y][thl4c][1] \|= inner2;
194	227k	}
195
196	1.26M	dav1d_memset_likely_pow2(a, thl4c, w4);
197	1.26M	dav1d_memset_likely_pow2(l, twl4c, h4);
198	1.26M	}
199
200		static void mask_edges_chroma(uint16_t (*const masks)[32][2][2],
201		const int cby4, const int cbx4,
202		const int cw4, const int ch4,
203		const int skip_inter,
204		const enum RectTxfmSize tx,
205		uint8_t const a, uint8_t const l,
206		const int ss_hor, const int ss_ver)
207	1.16M	{
208	1.16M	const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
209	1.16M	const int twl4 = t_dim->lw, thl4 = t_dim->lh;
210	1.16M	const int twl4c = !!twl4, thl4c = !!thl4;
211	1.16M	int y, x;
212	1.16M	const int vbits = 4 - ss_ver, hbits = 4 - ss_hor;
213	1.16M	const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
214	1.16M	const unsigned vmax = 1 << vmask, hmax = 1 << hmask;
215
216		// left block edge
217	1.16M	unsigned mask = 1U << cby4;
218	4.98M	for (y = 0; y < ch4; y++, mask <<= 1) {
219	3.81M	const int sidx = mask >= vmax;
220	3.81M	const unsigned smask = mask >> (sidx << vbits);
221	3.81M	masks[0][cbx4][imin(twl4c, l[y])][sidx] \|= smask;
222	3.81M	}
223
224		// top block edge
225	4.78M	for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) {
226	3.62M	const int sidx = mask >= hmax;
227	3.62M	const unsigned smask = mask >> (sidx << hbits);
228	3.62M	masks[1][cby4][imin(thl4c, a[x])][sidx] \|= smask;
229	3.62M	}
230
231	1.16M	if (!skip_inter) {
232		// inner (tx) left\|right edges
233	1.10M	const int hstep = t_dim->w;
234	1.10M	unsigned t = 1U << cby4;
235	1.10M	unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t);
236	1.10M	unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask;
237	1.17M	for (x = hstep; x < cw4; x += hstep) {
238	74.1k	if (inner1) masks[0][cbx4 + x][twl4c][0] \|= inner1;
239	74.1k	if (inner2) masks[0][cbx4 + x][twl4c][1] \|= inner2;
240	74.1k	}
241
242		// top
243		// inner (tx) --- edges
244		// bottom
245	1.10M	const int vstep = t_dim->h;
246	1.10M	t = 1U << cbx4;
247	1.10M	inner = (unsigned) ((((uint64_t) t) << cw4) - t);
248	1.10M	inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask;
249	1.19M	for (y = vstep; y < ch4; y += vstep) {
250	94.9k	if (inner1) masks[1][cby4 + y][thl4c][0] \|= inner1;
251	94.9k	if (inner2) masks[1][cby4 + y][thl4c][1] \|= inner2;
252	94.9k	}
253	1.10M	}
254
255	1.16M	dav1d_memset_likely_pow2(a, thl4c, cw4);
256	1.16M	dav1d_memset_likely_pow2(l, twl4c, ch4);
257	1.16M	}
258
259		void dav1d_create_lf_mask_intra(Av1Filter *const lflvl,
260		uint8_t (*const level_cache)[4],
261		const ptrdiff_t b4_stride,
262		const uint8_t (*filter_level)[8][2],
263		const int bx, const int by,
264		const int iw, const int ih,
265		const enum BlockSize bs,
266		const enum RectTxfmSize ytx,
267		const enum RectTxfmSize uvtx,
268		const enum Dav1dPixelLayout layout,
269		uint8_t const ay, uint8_t const ly,
270		uint8_t const auv, uint8_t const luv)
271	1.28M	{
272	1.28M	const uint8_t *const b_dim = dav1d_block_dimensions[bs];
273	1.28M	const int bw4 = imin(iw - bx, b_dim[0]);
274	1.28M	const int bh4 = imin(ih - by, b_dim[1]);
275	1.28M	const int bx4 = bx & 31;
276	1.28M	const int by4 = by & 31;
277	1.28M	assert(bw4 >= 0 && bh4 >= 0);
278
279	1.28M	if (bw4 && bh4) {
280	1.26M	uint8_t (level_cache_ptr)[4] = level_cache + by b4_stride + bx;
281	6.95M	for (int y = 0; y < bh4; y++) {
282	56.9M	for (int x = 0; x < bw4; x++) {
283	51.2M	level_cache_ptr[x][0] = filter_level[0][0][0];
284	51.2M	level_cache_ptr[x][1] = filter_level[1][0][0];
285	51.2M	}
286	5.69M	level_cache_ptr += b4_stride;
287	5.69M	}
288
289	1.26M	mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
290	1.26M	}
291
292	1.28M	if (!auv) return;
293
294	1.06M	const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
295	1.06M	const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
296	1.06M	const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
297	1.06M	(b_dim[0] + ss_hor) >> ss_hor);
298	1.06M	const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
299	1.06M	(b_dim[1] + ss_ver) >> ss_ver);
300	1.06M	assert(cbw4 >= 0 && cbh4 >= 0);
301
302	1.06M	if (!cbw4 \|\| !cbh4) return;
303
304	1.05M	const int cbx4 = bx4 >> ss_hor;
305	1.05M	const int cby4 = by4 >> ss_ver;
306
307	1.05M	uint8_t (*level_cache_ptr)[4] =
308	1.05M	level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
309	4.35M	for (int y = 0; y < cbh4; y++) {
310	24.2M	for (int x = 0; x < cbw4; x++) {
311	20.9M	level_cache_ptr[x][2] = filter_level[2][0][0];
312	20.9M	level_cache_ptr[x][3] = filter_level[3][0][0];
313	20.9M	}
314	3.29M	level_cache_ptr += b4_stride;
315	3.29M	}
316
317	1.05M	mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx,
318	1.05M	auv, luv, ss_hor, ss_ver);
319	1.05M	}
320
321		void dav1d_create_lf_mask_inter(Av1Filter *const lflvl,
322		uint8_t (*const level_cache)[4],
323		const ptrdiff_t b4_stride,
324		const uint8_t (*filter_level)[8][2],
325		const int bx, const int by,
326		const int iw, const int ih,
327		const int skip, const enum BlockSize bs,
328		const enum RectTxfmSize max_ytx,
329		const uint16_t *const tx_masks,
330		const enum RectTxfmSize uvtx,
331		const enum Dav1dPixelLayout layout,
332		uint8_t const ay, uint8_t const ly,
333		uint8_t const auv, uint8_t const luv)
334	120k	{
335	120k	const uint8_t *const b_dim = dav1d_block_dimensions[bs];
336	120k	const int bw4 = imin(iw - bx, b_dim[0]);
337	120k	const int bh4 = imin(ih - by, b_dim[1]);
338	120k	const int bx4 = bx & 31;
339	120k	const int by4 = by & 31;
340	120k	assert(bw4 >= 0 && bh4 >= 0);
341
342	120k	if (bw4 && bh4) {
343	115k	uint8_t (level_cache_ptr)[4] = level_cache + by b4_stride + bx;
344	746k	for (int y = 0; y < bh4; y++) {
345	4.80M	for (int x = 0; x < bw4; x++) {
346	4.17M	level_cache_ptr[x][0] = filter_level[0][0][0];
347	4.17M	level_cache_ptr[x][1] = filter_level[1][0][0];
348	4.17M	}
349	630k	level_cache_ptr += b4_stride;
350	630k	}
351
352	115k	mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
353	115k	max_ytx, tx_masks, ay, ly);
354	115k	}
355
356	120k	if (!auv) return;
357
358	109k	const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
359	109k	const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
360	109k	const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
361	109k	(b_dim[0] + ss_hor) >> ss_hor);
362	109k	const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
363	109k	(b_dim[1] + ss_ver) >> ss_ver);
364	109k	assert(cbw4 >= 0 && cbh4 >= 0);
365
366	109k	if (!cbw4 \|\| !cbh4) return;
367
368	105k	const int cbx4 = bx4 >> ss_hor;
369	105k	const int cby4 = by4 >> ss_ver;
370
371	105k	uint8_t (*level_cache_ptr)[4] =
372	105k	level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
373	626k	for (int y = 0; y < cbh4; y++) {
374	4.36M	for (int x = 0; x < cbw4; x++) {
375	3.84M	level_cache_ptr[x][2] = filter_level[2][0][0];
376	3.84M	level_cache_ptr[x][3] = filter_level[3][0][0];
377	3.84M	}
378	521k	level_cache_ptr += b4_stride;
379	521k	}
380
381	105k	mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx,
382	105k	auv, luv, ss_hor, ss_ver);
383	105k	}
384
385	25.8k	void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
386		// set E/I/H values from loopfilter level
387	25.8k	const int sharp = filter_sharpness;
388	1.67M	for (int level = 0; level < 64; level++) {
389	1.65M	int limit = level;
390
391	1.65M	if (sharp > 0) {
392	1.42M	limit >>= (sharp + 3) >> 2;
393	1.42M	limit = imin(limit, 9 - sharp);
394	1.42M	}
395	1.65M	limit = imax(limit, 1);
396
397	1.65M	lim_lut->i[level] = limit;
398	1.65M	lim_lut->e[level] = 2 * (level + 2) + limit;
399	1.65M	}
400	25.8k	lim_lut->sharp[0] = (sharp + 3) >> 2;
401	25.8k	lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff;
402	25.8k	}
403
404		static void calc_lf_value(uint8_t (*const lflvl_values)[2],
405		const int base_lvl, const int lf_delta,
406		const int seg_delta,
407		const Dav1dLoopfilterModeRefDeltas *const mr_delta)
408	982k	{
409	982k	const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
410
411	982k	if (!mr_delta) {
412	878k	memset(lflvl_values, base, sizeof(lflvl_values) 8);
413	878k	} else {
414	104k	const int sh = base >= 32;
415	104k	lflvl_values[0][0] = lflvl_values[0][1] =
416	104k	iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
417	834k	for (int r = 1; r < 8; r++) {
418	2.18M	for (int m = 0; m < 2; m++) {
419	1.45M	const int delta =
420	1.45M	mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
421	1.45M	lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
422	1.45M	}
423	729k	}
424	104k	}
425	982k	}
426
427		static inline void calc_lf_value_chroma(uint8_t (*const lflvl_values)[2],
428		const int base_lvl, const int lf_delta,
429		const int seg_delta,
430		const Dav1dLoopfilterModeRefDeltas *const mr_delta)
431	514k	{
432	514k	if (!base_lvl)
433	46.9k	memset(lflvl_values, 0, sizeof(lflvl_values) 8);
434	467k	else
435	467k	calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
436	514k	}
437
438		void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
439		const Dav1dFrameHeader *const hdr,
440		const int8_t lf_delta[4])
441	60.7k	{
442	60.7k	const int n_seg = hdr->segmentation.enabled ? 8 : 1;
443
444	60.7k	if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
445	2.44k	memset(lflvl_values, 0, sizeof(lflvl_values) n_seg);
446	2.44k	return;
447	2.44k	}
448
449	58.3k	const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
450	58.3k	hdr->loopfilter.mode_ref_delta_enabled ?
451	58.3k	&hdr->loopfilter.mode_ref_deltas : NULL;
452	315k	for (int s = 0; s < n_seg; s++) {
453	257k	const Dav1dSegmentationData *const segd =
454	257k	hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;
455
456	257k	calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0],
457	257k	lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
458	257k	calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1],
459	257k	lf_delta[hdr->delta.lf.multi ? 1 : 0],
460	257k	segd ? segd->delta_lf_y_h : 0, mr_deltas);
461	257k	calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u,
462	257k	lf_delta[hdr->delta.lf.multi ? 2 : 0],
463	257k	segd ? segd->delta_lf_u : 0, mr_deltas);
464	257k	calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v,
465	257k	lf_delta[hdr->delta.lf.multi ? 3 : 0],
466	257k	segd ? segd->delta_lf_v : 0, mr_deltas);
467	257k	}
468	58.3k	}