Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdint.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/intops.h" |
34 | | |
35 | | #include "src/wedge.h" |
36 | | |
37 | | enum WedgeDirectionType { |
38 | | WEDGE_HORIZONTAL = 0, |
39 | | WEDGE_VERTICAL = 1, |
40 | | WEDGE_OBLIQUE27 = 2, |
41 | | WEDGE_OBLIQUE63 = 3, |
42 | | WEDGE_OBLIQUE117 = 4, |
43 | | WEDGE_OBLIQUE153 = 5, |
44 | | N_WEDGE_DIRECTIONS |
45 | | }; |
46 | | |
47 | | typedef struct { |
48 | | uint8_t /* enum WedgeDirectionType */ direction; |
49 | | uint8_t x_offset; |
50 | | uint8_t y_offset; |
51 | | } wedge_code_type; |
52 | | |
53 | | static const wedge_code_type wedge_codebook_16_hgtw[16] = { |
54 | | { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, |
55 | | { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, |
56 | | { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, |
57 | | { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, |
58 | | { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, |
59 | | { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, |
60 | | { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, |
61 | | { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, |
62 | | }; |
63 | | |
64 | | static const wedge_code_type wedge_codebook_16_hltw[16] = { |
65 | | { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, |
66 | | { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, |
67 | | { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, |
68 | | { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, |
69 | | { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, |
70 | | { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, |
71 | | { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, |
72 | | { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, |
73 | | }; |
74 | | |
75 | | static const wedge_code_type wedge_codebook_16_heqw[16] = { |
76 | | { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, |
77 | | { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, |
78 | | { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, |
79 | | { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, |
80 | | { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, |
81 | | { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, |
82 | | { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, |
83 | | { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, |
84 | | }; |
85 | | |
86 | | Dav1dMasks dav1d_masks; |
87 | | |
88 | | static void insert_border(uint8_t *const dst, const uint8_t *const src, |
89 | | const int ctr) |
90 | 0 | { |
91 | 0 | if (ctr > 4) memset(dst, 0, ctr - 4); |
92 | 0 | memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8)); |
93 | 0 | if (ctr < 64 - 4) |
94 | 0 | memset(dst + ctr + 4, 64, 64 - 4 - ctr); |
95 | 0 | } |
96 | | |
97 | 0 | static void transpose(uint8_t *const dst, const uint8_t *const src) { |
98 | 0 | for (int y = 0, y_off = 0; y < 64; y++, y_off += 64) |
99 | 0 | for (int x = 0, x_off = 0; x < 64; x++, x_off += 64) |
100 | 0 | dst[x_off + y] = src[y_off + x]; |
101 | 0 | } |
102 | | |
103 | 0 | static void hflip(uint8_t *const dst, const uint8_t *const src) { |
104 | 0 | for (int y = 0, y_off = 0; y < 64; y++, y_off += 64) |
105 | 0 | for (int x = 0; x < 64; x++) |
106 | 0 | dst[y_off + 64 - 1 - x] = src[y_off + x]; |
107 | 0 | } |
108 | | |
109 | | static void copy2d(uint8_t *dst, const uint8_t *src, int sign, |
110 | | const int w, const int h, const int x_off, const int y_off) |
111 | 0 | { |
112 | 0 | src += y_off * 64 + x_off; |
113 | 0 | if (sign) { |
114 | 0 | for (int y = 0; y < h; y++) { |
115 | 0 | for (int x = 0; x < w; x++) |
116 | 0 | dst[x] = 64 - src[x]; |
117 | 0 | src += 64; |
118 | 0 | dst += w; |
119 | 0 | } |
120 | 0 | } else { |
121 | 0 | for (int y = 0; y < h; y++) { |
122 | 0 | memcpy(dst, src, w); |
123 | 0 | src += 64; |
124 | 0 | dst += w; |
125 | 0 | } |
126 | 0 | } |
127 | 0 | } |
128 | | |
129 | 0 | #define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3)) |
130 | | |
131 | | static COLD uint16_t init_chroma(uint8_t *chroma, const uint8_t *luma, |
132 | | const int sign, const int w, const int h, |
133 | | const int ss_ver) |
134 | 0 | { |
135 | 0 | const uint16_t offset = MASK_OFFSET(chroma); |
136 | 0 | for (int y = 0; y < h; y += 1 + ss_ver) { |
137 | 0 | for (int x = 0; x < w; x += 2) { |
138 | 0 | int sum = luma[x] + luma[x + 1] + 1; |
139 | 0 | if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1; |
140 | 0 | chroma[x >> 1] = (sum - sign) >> (1 + ss_ver); |
141 | 0 | } |
142 | 0 | luma += w << ss_ver; |
143 | 0 | chroma += w >> 1; |
144 | 0 | } |
145 | 0 | return offset; |
146 | 0 | } |
147 | | |
148 | | static COLD void fill2d_16x2(const int w, const int h, const enum BlockSize bs, |
149 | | const uint8_t (*const master)[64 * 64], |
150 | | const wedge_code_type *const cb, |
151 | | uint8_t *masks_444, uint8_t *masks_422, |
152 | | uint8_t *masks_420, unsigned signs) |
153 | 0 | { |
154 | 0 | const int n_stride_444 = (w * h); |
155 | 0 | const int n_stride_422 = n_stride_444 >> 1; |
156 | 0 | const int n_stride_420 = n_stride_444 >> 2; |
157 | 0 | const int sign_stride_422 = 16 * n_stride_422; |
158 | 0 | const int sign_stride_420 = 16 * n_stride_420; |
159 | | |
160 | | // assign pointer offsets in lookup table |
161 | 0 | for (int n = 0; n < 16; n++) { |
162 | 0 | const int sign = signs & 1; |
163 | |
|
164 | 0 | copy2d(masks_444, master[cb[n].direction], sign, w, h, |
165 | 0 | 32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3)); |
166 | | |
167 | | // not using !sign is intentional here, since 444 does not require |
168 | | // any rounding since no chroma subsampling is applied. |
169 | 0 | dav1d_masks.offsets[0][bs].wedge[0][n] = |
170 | 0 | dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444); |
171 | |
|
172 | 0 | dav1d_masks.offsets[1][bs].wedge[0][n] = |
173 | 0 | init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0); |
174 | 0 | dav1d_masks.offsets[1][bs].wedge[1][n] = |
175 | 0 | init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0); |
176 | 0 | dav1d_masks.offsets[2][bs].wedge[0][n] = |
177 | 0 | init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1); |
178 | 0 | dav1d_masks.offsets[2][bs].wedge[1][n] = |
179 | 0 | init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1); |
180 | |
|
181 | 0 | signs >>= 1; |
182 | 0 | masks_444 += n_stride_444; |
183 | 0 | masks_422 += n_stride_422; |
184 | 0 | masks_420 += n_stride_420; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | static COLD void build_nondc_ii_masks(uint8_t *const mask_v, const int w, |
189 | | const int h, const int step) |
190 | 0 | { |
191 | 0 | static const uint8_t ii_weights_1d[32] = { |
192 | 0 | 60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7, |
193 | 0 | 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, |
194 | 0 | }; |
195 | |
|
196 | 0 | uint8_t *const mask_h = &mask_v[w * h]; |
197 | 0 | uint8_t *const mask_sm = &mask_h[w * h]; |
198 | 0 | for (int y = 0, off = 0; y < h; y++, off += w) { |
199 | 0 | memset(&mask_v[off], ii_weights_1d[y * step], w); |
200 | 0 | for (int x = 0; x < w; x++) { |
201 | 0 | mask_sm[off + x] = ii_weights_1d[imin(x, y) * step]; |
202 | 0 | mask_h[off + x] = ii_weights_1d[x * step]; |
203 | 0 | } |
204 | 0 | } |
205 | 0 | } |
206 | | |
207 | 0 | COLD void dav1d_init_ii_wedge_masks(void) { |
208 | | // This function is guaranteed to be called only once |
209 | |
|
210 | 0 | enum WedgeMasterLineType { |
211 | 0 | WEDGE_MASTER_LINE_ODD, |
212 | 0 | WEDGE_MASTER_LINE_EVEN, |
213 | 0 | WEDGE_MASTER_LINE_VERT, |
214 | 0 | N_WEDGE_MASTER_LINES, |
215 | 0 | }; |
216 | 0 | static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = { |
217 | 0 | [WEDGE_MASTER_LINE_ODD] = { 1, 2, 6, 18, 37, 53, 60, 63 }, |
218 | 0 | [WEDGE_MASTER_LINE_EVEN] = { 1, 4, 11, 27, 46, 58, 62, 63 }, |
219 | 0 | [WEDGE_MASTER_LINE_VERT] = { 0, 2, 7, 21, 43, 57, 62, 64 }, |
220 | 0 | }; |
221 | 0 | uint8_t master[6][64 * 64]; |
222 | | |
223 | | // create master templates |
224 | 0 | for (int y = 0, off = 0; y < 64; y++, off += 64) |
225 | 0 | insert_border(&master[WEDGE_VERTICAL][off], |
226 | 0 | wedge_master_border[WEDGE_MASTER_LINE_VERT], 32); |
227 | 0 | for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--) |
228 | 0 | { |
229 | 0 | insert_border(&master[WEDGE_OBLIQUE63][off], |
230 | 0 | wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr); |
231 | 0 | insert_border(&master[WEDGE_OBLIQUE63][off + 64], |
232 | 0 | wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1); |
233 | 0 | } |
234 | |
|
235 | 0 | transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]); |
236 | 0 | transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]); |
237 | 0 | hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]); |
238 | 0 | hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]); |
239 | |
|
240 | 0 | #define fill(w, h, sz_422, sz_420, hvsw, signs) \ |
241 | 0 | fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \ |
242 | 0 | master, wedge_codebook_16_##hvsw, \ |
243 | 0 | dav1d_masks.wedge_444_##w##x##h, \ |
244 | 0 | dav1d_masks.wedge_422_##sz_422, \ |
245 | 0 | dav1d_masks.wedge_420_##sz_420, signs) |
246 | |
|
247 | 0 | fill(32, 32, 16x32, 16x16, heqw, 0x7bfb); |
248 | 0 | fill(32, 16, 16x16, 16x8, hltw, 0x7beb); |
249 | 0 | fill(32, 8, 16x8, 16x4, hltw, 0x6beb); |
250 | 0 | fill(16, 32, 8x32, 8x16, hgtw, 0x7beb); |
251 | 0 | fill(16, 16, 8x16, 8x8, heqw, 0x7bfb); |
252 | 0 | fill(16, 8, 8x8, 8x4, hltw, 0x7beb); |
253 | 0 | fill( 8, 32, 4x32, 4x16, hgtw, 0x7aeb); |
254 | 0 | fill( 8, 16, 4x16, 4x8, hgtw, 0x7beb); |
255 | 0 | fill( 8, 8, 4x8, 4x4, heqw, 0x7bfb); |
256 | 0 | #undef fill |
257 | |
|
258 | 0 | memset(dav1d_masks.ii_dc, 32, 32 * 32); |
259 | 0 | for (int c = 0; c < 3; c++) { |
260 | 0 | dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] = |
261 | 0 | dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] = |
262 | 0 | dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] = |
263 | 0 | dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] = |
264 | 0 | dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] = |
265 | 0 | dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] = |
266 | 0 | dav1d_masks.offsets[c][BS_8x8 -BS_32x32].ii[II_DC_PRED] = |
267 | 0 | MASK_OFFSET(dav1d_masks.ii_dc); |
268 | 0 | } |
269 | |
|
270 | 0 | #define BUILD_NONDC_II_MASKS(w, h, step) \ |
271 | 0 | build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step) |
272 | |
|
273 | 0 | #define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \ |
274 | 0 | dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \ |
275 | 0 | MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \ |
276 | 0 | dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \ |
277 | 0 | MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \ |
278 | 0 | dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \ |
279 | 0 | MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420]) |
280 | |
|
281 | 0 | BUILD_NONDC_II_MASKS(32, 32, 1); |
282 | 0 | BUILD_NONDC_II_MASKS(16, 32, 1); |
283 | 0 | BUILD_NONDC_II_MASKS(16, 16, 2); |
284 | 0 | BUILD_NONDC_II_MASKS( 8, 32, 1); |
285 | 0 | BUILD_NONDC_II_MASKS( 8, 16, 2); |
286 | 0 | BUILD_NONDC_II_MASKS( 8, 8, 4); |
287 | 0 | BUILD_NONDC_II_MASKS( 4, 16, 2); |
288 | 0 | BUILD_NONDC_II_MASKS( 4, 8, 4); |
289 | 0 | BUILD_NONDC_II_MASKS( 4, 4, 8); |
290 | 0 | for (int p = 0; p < 3; p++) { |
291 | 0 | ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16); |
292 | 0 | ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16); |
293 | 0 | ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32, 8, 32, 8, 16); |
294 | 0 | ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16, 8, 16, 8, 8); |
295 | 0 | ASSIGN_NONDC_II_OFFSET(BS_16x8, 16, 16, 8, 8, 8, 8); |
296 | 0 | ASSIGN_NONDC_II_OFFSET(BS_8x16, 8, 16, 4, 16, 4, 8); |
297 | 0 | ASSIGN_NONDC_II_OFFSET(BS_8x8, 8, 8, 4, 8, 4, 4); |
298 | 0 | } |
299 | 0 | } |