/work/dav1d/src/wedge.c

Source
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#include <stdint.h>
#include <string.h>

#include "common/intops.h"

#include "src/wedge.h"

enum WedgeDirectionType {
    WEDGE_HORIZONTAL = 0,
    WEDGE_VERTICAL = 1,
    WEDGE_OBLIQUE27 = 2,
    WEDGE_OBLIQUE63 = 3,
    WEDGE_OBLIQUE117 = 4,
    WEDGE_OBLIQUE153 = 5,
    N_WEDGE_DIRECTIONS
};

typedef struct {
    uint8_t /* enum WedgeDirectionType */ direction;
    uint8_t x_offset;
    uint8_t y_offset;
} wedge_code_type;

static const wedge_code_type wedge_codebook_16_hgtw[16] = {
    { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
    { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
    { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
    { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL,   4, 4 },
    { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
    { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
    { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
    { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_hltw[16] = {
    { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
    { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
    { WEDGE_VERTICAL,   2, 4 }, { WEDGE_VERTICAL,   4, 4 },
    { WEDGE_VERTICAL,   6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
    { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
    { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
    { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
    { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_heqw[16] = {
    { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
    { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
    { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
    { WEDGE_VERTICAL,   2, 4 }, { WEDGE_VERTICAL,   6, 4 },
    { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
    { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
    { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
    { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

Dav1dMasks dav1d_masks;

static void insert_border(uint8_t *const dst, const uint8_t *const src,
                          const int ctr)
{
    if (ctr > 4) memset(dst, 0, ctr - 4);
    memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
    if (ctr < 64 - 4)
        memset(dst + ctr + 4, 64, 64 - 4 - ctr);
}

static void transpose(uint8_t *const dst, const uint8_t *const src) {
    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
        for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
            dst[x_off + y] = src[y_off + x];
}

static void hflip(uint8_t *const dst, const uint8_t *const src) {
    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
        for (int x = 0; x < 64; x++)
            dst[y_off + 64 - 1 - x] = src[y_off + x];
}

static void copy2d(uint8_t *dst, const uint8_t *src, int sign,
                   const int w, const int h, const int x_off, const int y_off)
{
    src += y_off * 64 + x_off;
    if (sign) {
        for (int y = 0; y < h; y++) {
            for (int x = 0; x < w; x++)
                dst[x] = 64 - src[x];
            src += 64;
            dst += w;
        }
    } else {
        for (int y = 0; y < h; y++) {
            memcpy(dst, src, w);
            src += 64;
            dst += w;
        }
    }
}

#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))

static COLD uint16_t init_chroma(uint8_t *chroma, const uint8_t *luma,
                                 const int sign, const int w, const int h,
                                 const int ss_ver)
{
    const uint16_t offset = MASK_OFFSET(chroma);
    for (int y = 0; y < h; y += 1 + ss_ver) {
        for (int x = 0; x < w; x += 2) {
            int sum = luma[x] + luma[x + 1] + 1;
            if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
            chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
        }
        luma += w << ss_ver;
        chroma += w >> 1;
    }
    return offset;
}

static COLD void fill2d_16x2(const int w, const int h, const enum BlockSize bs,
                             const uint8_t (*const master)[64 * 64],
                             const wedge_code_type *const cb,
                             uint8_t *masks_444, uint8_t *masks_422,
                             uint8_t *masks_420, unsigned signs)
{
    const int n_stride_444 = (w * h);
    const int n_stride_422 = n_stride_444 >> 1;
    const int n_stride_420 = n_stride_444 >> 2;
    const int sign_stride_422 = 16 * n_stride_422;
    const int sign_stride_420 = 16 * n_stride_420;

    // assign pointer offsets in lookup table
    for (int n = 0; n < 16; n++) {
        const int sign = signs & 1;

        copy2d(masks_444, master[cb[n].direction], sign, w, h,
               32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));

        // not using !sign is intentional here, since 444 does not require
        // any rounding since no chroma subsampling is applied.
        dav1d_masks.offsets[0][bs].wedge[0][n] =
        dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);

        dav1d_masks.offsets[1][bs].wedge[0][n] =
            init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
        dav1d_masks.offsets[1][bs].wedge[1][n] =
            init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
        dav1d_masks.offsets[2][bs].wedge[0][n] =
            init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
        dav1d_masks.offsets[2][bs].wedge[1][n] =
            init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);

        signs >>= 1;
        masks_444 += n_stride_444;
        masks_422 += n_stride_422;
        masks_420 += n_stride_420;
    }
}

static COLD void build_nondc_ii_masks(uint8_t *const mask_v, const int w,
                                      const int h, const int step)
{
    static const uint8_t ii_weights_1d[32] = {
        60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
         6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
    };

    uint8_t *const mask_h  = &mask_v[w * h];
    uint8_t *const mask_sm = &mask_h[w * h];
    for (int y = 0, off = 0; y < h; y++, off += w) {
        memset(&mask_v[off], ii_weights_1d[y * step], w);
        for (int x = 0; x < w; x++) {
            mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
            mask_h[off + x] = ii_weights_1d[x * step];
        }
    }
}

COLD void dav1d_init_ii_wedge_masks(void) {
    // This function is guaranteed to be called only once

    enum WedgeMasterLineType {
        WEDGE_MASTER_LINE_ODD,
        WEDGE_MASTER_LINE_EVEN,
        WEDGE_MASTER_LINE_VERT,
        N_WEDGE_MASTER_LINES,
    };
    static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
        [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
        [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
        [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
    };
    uint8_t master[6][64 * 64];

    // create master templates
    for (int y = 0, off = 0; y < 64; y++, off += 64)
        insert_border(&master[WEDGE_VERTICAL][off],
                      wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
    for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
    {
        insert_border(&master[WEDGE_OBLIQUE63][off],
                      wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
        insert_border(&master[WEDGE_OBLIQUE63][off + 64],
                      wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
    }

    transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
    transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
    hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
    hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);

#define fill(w, h, sz_422, sz_420, hvsw, signs) \
    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
                master, wedge_codebook_16_##hvsw, \
                dav1d_masks.wedge_444_##w##x##h, \
                dav1d_masks.wedge_422_##sz_422, \
                dav1d_masks.wedge_420_##sz_420, signs)

    fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
    fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
    fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
    fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
    fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
    fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
    fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
    fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
    fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
#undef fill

    memset(dav1d_masks.ii_dc, 32, 32 * 32);
    for (int c = 0; c < 3; c++) {
        dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
        dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
        dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
        dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
        dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
        dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
        dav1d_masks.offsets[c][BS_8x8  -BS_32x32].ii[II_DC_PRED] =
            MASK_OFFSET(dav1d_masks.ii_dc);
    }

#define BUILD_NONDC_II_MASKS(w, h, step) \
    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)

#define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])

    BUILD_NONDC_II_MASKS(32, 32, 1);
    BUILD_NONDC_II_MASKS(16, 32, 1);
    BUILD_NONDC_II_MASKS(16, 16, 2);
    BUILD_NONDC_II_MASKS( 8, 32, 1);
    BUILD_NONDC_II_MASKS( 8, 16, 2);
    BUILD_NONDC_II_MASKS( 8,  8, 4);
    BUILD_NONDC_II_MASKS( 4, 16, 2);
    BUILD_NONDC_II_MASKS( 4,  8, 4);
    BUILD_NONDC_II_MASKS( 4,  4, 8);
    for (int p = 0; p < 3; p++) {
        ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
        ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
        ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32,  8, 32,  8, 16);
        ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16,  8, 16,  8,  8);
        ASSIGN_NONDC_II_OFFSET(BS_16x8,  16, 16,  8,  8,  8,  8);
        ASSIGN_NONDC_II_OFFSET(BS_8x16,   8, 16,  4, 16,  4,  8);
        ASSIGN_NONDC_II_OFFSET(BS_8x8,    8,  8,  4,  8,  4,  4);
    }
}

Coverage Report

Created: 2026-05-16 06:41

Line	Count	Source
1		/*
2		* Copyright © 2018, VideoLAN and dav1d authors
3		* Copyright © 2018, Two Orioles, LLC
4		* All rights reserved.
5		*
6		* Redistribution and use in source and binary forms, with or without
7		* modification, are permitted provided that the following conditions are met:
8		*
9		* 1. Redistributions of source code must retain the above copyright notice, this
10		* list of conditions and the following disclaimer.
11		*
12		* 2. Redistributions in binary form must reproduce the above copyright notice,
13		* this list of conditions and the following disclaimer in the documentation
14		* and/or other materials provided with the distribution.
15		*
16		* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17		* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18		* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19		* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20		* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21		* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22		* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23		* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24		* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25		* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26		*/
27
28		#include "config.h"
29
30		#include <stdint.h>
31		#include <string.h>
32
33		#include "common/intops.h"
34
35		#include "src/wedge.h"
36
37		enum WedgeDirectionType {
38		WEDGE_HORIZONTAL = 0,
39		WEDGE_VERTICAL = 1,
40		WEDGE_OBLIQUE27 = 2,
41		WEDGE_OBLIQUE63 = 3,
42		WEDGE_OBLIQUE117 = 4,
43		WEDGE_OBLIQUE153 = 5,
44		N_WEDGE_DIRECTIONS
45		};
46
47		typedef struct {
48		uint8_t /* enum WedgeDirectionType */ direction;
49		uint8_t x_offset;
50		uint8_t y_offset;
51		} wedge_code_type;
52
53		static const wedge_code_type wedge_codebook_16_hgtw[16] = {
54		{ WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
55		{ WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
56		{ WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
57		{ WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
58		{ WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
59		{ WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
60		{ WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
61		{ WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62		};
63
64		static const wedge_code_type wedge_codebook_16_hltw[16] = {
65		{ WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
66		{ WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
67		{ WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
68		{ WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
69		{ WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
70		{ WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
71		{ WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
72		{ WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
73		};
74
75		static const wedge_code_type wedge_codebook_16_heqw[16] = {
76		{ WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
77		{ WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
78		{ WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
79		{ WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
80		{ WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
81		{ WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
82		{ WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
83		{ WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
84		};
85
86		Dav1dMasks dav1d_masks;
87
88		static void insert_border(uint8_t const dst, const uint8_t const src,
89		const int ctr)
90	128	{
91	128	if (ctr > 4) memset(dst, 0, ctr - 4);
92	128	memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
93	128	if (ctr < 64 - 4)
94	128	memset(dst + ctr + 4, 64, 64 - 4 - ctr);
95	128	}
96
97	2	static void transpose(uint8_t const dst, const uint8_t const src) {
98	130	for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
99	8.32k	for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
100	8.19k	dst[x_off + y] = src[y_off + x];
101	2	}
102
103	2	static void hflip(uint8_t const dst, const uint8_t const src) {
104	130	for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
105	8.32k	for (int x = 0; x < 64; x++)
106	8.19k	dst[y_off + 64 - 1 - x] = src[y_off + x];
107	2	}
108
109		static void copy2d(uint8_t dst, const uint8_t src, int sign,
110		const int w, const int h, const int x_off, const int y_off)
111	144	{
112	144	src += y_off * 64 + x_off;
113	144	if (sign) {
114	2.14k	for (int y = 0; y < h; y++) {
115	40.4k	for (int x = 0; x < w; x++)
116	38.4k	dst[x] = 64 - src[x];
117	2.03k	src += 64;
118	2.03k	dst += w;
119	2.03k	}
120	109	} else {
121	691	for (int y = 0; y < h; y++) {
122	656	memcpy(dst, src, w);
123	656	src += 64;
124	656	dst += w;
125	656	}
126	35	}
127	144	}
128
129	786	#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
130
131		static COLD uint16_t init_chroma(uint8_t chroma, const uint8_t luma,
132		const int sign, const int w, const int h,
133		const int ss_ver)
134	576	{
135	576	const uint16_t offset = MASK_OFFSET(chroma);
136	8.64k	for (int y = 0; y < h; y += 1 + ss_ver) {
137	83.3k	for (int x = 0; x < w; x += 2) {
138	75.2k	int sum = luma[x] + luma[x + 1] + 1;
139	75.2k	if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
140	75.2k	chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
141	75.2k	}
142	8.06k	luma += w << ss_ver;
143	8.06k	chroma += w >> 1;
144	8.06k	}
145	576	return offset;
146	576	}
147
148		static COLD void fill2d_16x2(const int w, const int h, const enum BlockSize bs,
149		const uint8_t (const master)[64 64],
150		const wedge_code_type *const cb,
151		uint8_t masks_444, uint8_t masks_422,
152		uint8_t *masks_420, unsigned signs)
153	9	{
154	9	const int n_stride_444 = (w * h);
155	9	const int n_stride_422 = n_stride_444 >> 1;
156	9	const int n_stride_420 = n_stride_444 >> 2;
157	9	const int sign_stride_422 = 16 * n_stride_422;
158	9	const int sign_stride_420 = 16 * n_stride_420;
159
160		// assign pointer offsets in lookup table
161	153	for (int n = 0; n < 16; n++) {
162	144	const int sign = signs & 1;
163
164	144	copy2d(masks_444, master[cb[n].direction], sign, w, h,
165	144	32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
166
167		// not using !sign is intentional here, since 444 does not require
168		// any rounding since no chroma subsampling is applied.
169	144	dav1d_masks.offsets[0][bs].wedge[0][n] =
170	144	dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);
171
172	144	dav1d_masks.offsets[1][bs].wedge[0][n] =
173	144	init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
174	144	dav1d_masks.offsets[1][bs].wedge[1][n] =
175	144	init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
176	144	dav1d_masks.offsets[2][bs].wedge[0][n] =
177	144	init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
178	144	dav1d_masks.offsets[2][bs].wedge[1][n] =
179	144	init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);
180
181	144	signs >>= 1;
182	144	masks_444 += n_stride_444;
183	144	masks_422 += n_stride_422;
184	144	masks_420 += n_stride_420;
185	144	}
186	9	}
187
188		static COLD void build_nondc_ii_masks(uint8_t *const mask_v, const int w,
189		const int h, const int step)
190	9	{
191	9	static const uint8_t ii_weights_1d[32] = {
192	9	60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7,
193	9	6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1,
194	9	};
195
196	9	uint8_t const mask_h = &mask_v[w h];
197	9	uint8_t const mask_sm = &mask_h[w h];
198	173	for (int y = 0, off = 0; y < h; y++, off += w) {
199	164	memset(&mask_v[off], ii_weights_1d[y * step], w);
200	2.51k	for (int x = 0; x < w; x++) {
201	2.35k	mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
202	2.35k	mask_h[off + x] = ii_weights_1d[x * step];
203	2.35k	}
204	164	}
205	9	}
206
207	1	COLD void dav1d_init_ii_wedge_masks(void) {
208		// This function is guaranteed to be called only once
209
210	1	enum WedgeMasterLineType {
211	1	WEDGE_MASTER_LINE_ODD,
212	1	WEDGE_MASTER_LINE_EVEN,
213	1	WEDGE_MASTER_LINE_VERT,
214	1	N_WEDGE_MASTER_LINES,
215	1	};
216	1	static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
217	1	[WEDGE_MASTER_LINE_ODD] = { 1, 2, 6, 18, 37, 53, 60, 63 },
218	1	[WEDGE_MASTER_LINE_EVEN] = { 1, 4, 11, 27, 46, 58, 62, 63 },
219	1	[WEDGE_MASTER_LINE_VERT] = { 0, 2, 7, 21, 43, 57, 62, 64 },
220	1	};
221	1	uint8_t master[6][64 * 64];
222
223		// create master templates
224	65	for (int y = 0, off = 0; y < 64; y++, off += 64)
225	64	insert_border(&master[WEDGE_VERTICAL][off],
226	64	wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
227	33	for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
228	32	{
229	32	insert_border(&master[WEDGE_OBLIQUE63][off],
230	32	wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
231	32	insert_border(&master[WEDGE_OBLIQUE63][off + 64],
232	32	wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
233	32	}
234
235	1	transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
236	1	transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
237	1	hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
238	1	hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
239
240	1	#define fill(w, h, sz_422, sz_420, hvsw, signs) \
241	9	fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
242	9	master, wedge_codebook_16_##hvsw, \
243	9	dav1d_masks.wedge_444_##w##x##h, \
244	9	dav1d_masks.wedge_422_##sz_422, \
245	9	dav1d_masks.wedge_420_##sz_420, signs)
246
247	1	fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
248	1	fill(32, 16, 16x16, 16x8, hltw, 0x7beb);
249	1	fill(32, 8, 16x8, 16x4, hltw, 0x6beb);
250	1	fill(16, 32, 8x32, 8x16, hgtw, 0x7beb);
251	1	fill(16, 16, 8x16, 8x8, heqw, 0x7bfb);
252	1	fill(16, 8, 8x8, 8x4, hltw, 0x7beb);
253	1	fill( 8, 32, 4x32, 4x16, hgtw, 0x7aeb);
254	1	fill( 8, 16, 4x16, 4x8, hgtw, 0x7beb);
255	1	fill( 8, 8, 4x8, 4x4, heqw, 0x7bfb);
256	1	#undef fill
257
258	1	memset(dav1d_masks.ii_dc, 32, 32 * 32);
259	4	for (int c = 0; c < 3; c++) {
260	3	dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
261	3	dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
262	3	dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
263	3	dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
264	3	dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
265	3	dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
266	3	dav1d_masks.offsets[c][BS_8x8 -BS_32x32].ii[II_DC_PRED] =
267	3	MASK_OFFSET(dav1d_masks.ii_dc);
268	3	}
269
270	1	#define BUILD_NONDC_II_MASKS(w, h, step) \
271	9	build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
272
273	1	#define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
274	21	dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
275	21	MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[pw444h444]); \
276	21	dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
277	21	MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[pw422h422]); \
278	21	dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
279	21	MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[pw420h420])
280
281	1	BUILD_NONDC_II_MASKS(32, 32, 1);
282	1	BUILD_NONDC_II_MASKS(16, 32, 1);
283	1	BUILD_NONDC_II_MASKS(16, 16, 2);
284	1	BUILD_NONDC_II_MASKS( 8, 32, 1);
285	1	BUILD_NONDC_II_MASKS( 8, 16, 2);
286	1	BUILD_NONDC_II_MASKS( 8, 8, 4);
287	1	BUILD_NONDC_II_MASKS( 4, 16, 2);
288	1	BUILD_NONDC_II_MASKS( 4, 8, 4);
289	1	BUILD_NONDC_II_MASKS( 4, 4, 8);
290	4	for (int p = 0; p < 3; p++) {
291	3	ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
292	3	ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
293	3	ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32, 8, 32, 8, 16);
294	3	ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16, 8, 16, 8, 8);
295	3	ASSIGN_NONDC_II_OFFSET(BS_16x8, 16, 16, 8, 8, 8, 8);
296	3	ASSIGN_NONDC_II_OFFSET(BS_8x16, 8, 16, 4, 16, 4, 8);
297	3	ASSIGN_NONDC_II_OFFSET(BS_8x8, 8, 8, 4, 8, 4, 4);
298	3	}
299	1	}