/work/svt-av1/Source/Lib/Codec/inter_prediction.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 3-Clause Clear License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | #include <stdlib.h> |
14 | | |
15 | | #include "inter_prediction.h" |
16 | | #include "convolve.h" |
17 | | #include "common_dsp_rtcd.h" |
18 | | #include "utility.h" |
19 | | #include "pic_operators.h" |
20 | | |
21 | 0 | #define SCALE_SUBPEL_BITS 10 |
22 | 0 | #define SCALE_SUBPEL_SHIFTS (1 << SCALE_SUBPEL_BITS) |
23 | 0 | #define SCALE_SUBPEL_MASK (SCALE_SUBPEL_SHIFTS - 1) |
24 | 0 | #define SCALE_EXTRA_BITS (SCALE_SUBPEL_BITS - SUBPEL_BITS) |
25 | | |
26 | | void svt_aom_pack_block(uint8_t* in8_bit_buffer, uint32_t in8_stride, uint8_t* inn_bit_buffer, uint32_t inn_stride, |
27 | 0 | uint16_t* out16_bit_buffer, uint32_t out_stride, uint32_t width, uint32_t height) { |
28 | 0 | svt_aom_pack2d_src( |
29 | 0 | in8_bit_buffer, in8_stride, inn_bit_buffer, inn_stride, out16_bit_buffer, out_stride, width, height); |
30 | 0 | } |
31 | | |
32 | | static WedgeMasksType wedge_masks[BLOCK_SIZES_ALL][2]; |
33 | | |
34 | 0 | int svt_aom_is_masked_compound_type(COMPOUND_TYPE type) { |
35 | 0 | return (type == COMPOUND_WEDGE || type == COMPOUND_DIFFWTD); |
36 | 0 | } |
37 | | |
38 | | void svt_aom_highbd_subtract_block_c(int rows, int cols, int16_t* diff, ptrdiff_t diff_stride, const uint8_t* src8, |
39 | 0 | ptrdiff_t src_stride, const uint8_t* pred8, ptrdiff_t pred_stride, int bd) { |
40 | 0 | uint16_t* src = (uint16_t*)(src8); |
41 | 0 | uint16_t* pred = (uint16_t*)(pred8); |
42 | 0 | (void)bd; |
43 | |
|
44 | 0 | for (int r = 0; r < rows; r++) { |
45 | 0 | for (int c = 0; c < cols; c++) { |
46 | 0 | diff[c] = src[c] - pred[c]; |
47 | 0 | } |
48 | |
|
49 | 0 | diff += diff_stride; |
50 | 0 | pred += pred_stride; |
51 | 0 | src += src_stride; |
52 | 0 | } |
53 | 0 | } |
54 | | |
55 | | void svt_aom_subtract_block_c(int rows, int cols, int16_t* diff, ptrdiff_t diff_stride, const uint8_t* src, |
56 | 0 | ptrdiff_t src_stride, const uint8_t* pred, ptrdiff_t pred_stride) { |
57 | 0 | for (int r = 0; r < rows; r++) { |
58 | 0 | for (int c = 0; c < cols; c++) { |
59 | 0 | diff[c] = src[c] - pred[c]; |
60 | 0 | } |
61 | |
|
62 | 0 | diff += diff_stride; |
63 | 0 | pred += pred_stride; |
64 | 0 | src += src_stride; |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | | static void diffwtd_mask(uint8_t* mask, int which_inverse, int mask_base, const uint8_t* src0, int src0_stride, |
69 | 0 | const uint8_t* src1, int src1_stride, int h, int w) { |
70 | 0 | for (int i = 0; i < h; ++i) { |
71 | 0 | for (int j = 0; j < w; ++j) { |
72 | 0 | int diff = abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]); |
73 | 0 | int m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); |
74 | 0 | mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; |
75 | 0 | } |
76 | 0 | } |
77 | 0 | } |
78 | | |
79 | | static AOM_FORCE_INLINE void diffwtd_mask_highbd(uint8_t* mask, int which_inverse, int mask_base, const uint16_t* src0, |
80 | | int src0_stride, const uint16_t* src1, int src1_stride, int h, int w, |
81 | 0 | const unsigned int bd) { |
82 | 0 | assert(bd >= 8); |
83 | 0 | if (bd == 8) { |
84 | 0 | if (which_inverse) { |
85 | 0 | for (int i = 0; i < h; ++i) { |
86 | 0 | for (int j = 0; j < w; ++j) { |
87 | 0 | int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; |
88 | 0 | unsigned int m = negative_to_zero(mask_base + diff); |
89 | 0 | m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); |
90 | 0 | mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; |
91 | 0 | } |
92 | 0 | src0 += src0_stride; |
93 | 0 | src1 += src1_stride; |
94 | 0 | mask += w; |
95 | 0 | } |
96 | 0 | } else { |
97 | 0 | for (int i = 0; i < h; ++i) { |
98 | 0 | for (int j = 0; j < w; ++j) { |
99 | 0 | int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; |
100 | 0 | unsigned int m = negative_to_zero(mask_base + diff); |
101 | 0 | m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); |
102 | 0 | mask[j] = m; |
103 | 0 | } |
104 | 0 | src0 += src0_stride; |
105 | 0 | src1 += src1_stride; |
106 | 0 | mask += w; |
107 | 0 | } |
108 | 0 | } |
109 | 0 | } else { |
110 | 0 | const unsigned int bd_shift = bd - 8; |
111 | 0 | if (which_inverse) { |
112 | 0 | for (int i = 0; i < h; ++i) { |
113 | 0 | for (int j = 0; j < w; ++j) { |
114 | 0 | int diff = (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; |
115 | 0 | unsigned int m = negative_to_zero(mask_base + diff); |
116 | 0 | m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); |
117 | 0 | mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; |
118 | 0 | } |
119 | 0 | src0 += src0_stride; |
120 | 0 | src1 += src1_stride; |
121 | 0 | mask += w; |
122 | 0 | } |
123 | 0 | } else { |
124 | 0 | for (int i = 0; i < h; ++i) { |
125 | 0 | for (int j = 0; j < w; ++j) { |
126 | 0 | int diff = (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; |
127 | 0 | unsigned int m = negative_to_zero(mask_base + diff); |
128 | 0 | m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); |
129 | 0 | mask[j] = m; |
130 | 0 | } |
131 | 0 | src0 += src0_stride; |
132 | 0 | src1 += src1_stride; |
133 | 0 | mask += w; |
134 | 0 | } |
135 | 0 | } |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | | void svt_av1_build_compound_diffwtd_mask_highbd_c(uint8_t* mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t* src0, |
140 | | int src0_stride, const uint8_t* src1, int src1_stride, int h, int w, |
141 | 0 | int bd) { |
142 | 0 | switch (mask_type) { |
143 | 0 | case DIFFWTD_38: |
144 | 0 | diffwtd_mask_highbd(mask, 0, 38, (uint16_t*)src0, src0_stride, (uint16_t*)src1, src1_stride, h, w, bd); |
145 | 0 | break; |
146 | 0 | case DIFFWTD_38_INV: |
147 | 0 | diffwtd_mask_highbd(mask, 1, 38, (uint16_t*)src0, src0_stride, (uint16_t*)src1, src1_stride, h, w, bd); |
148 | 0 | break; |
149 | 0 | default: |
150 | 0 | assert(0); |
151 | 0 | } |
152 | 0 | } |
153 | | |
154 | | void svt_av1_build_compound_diffwtd_mask_c(uint8_t* mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t* src0, |
155 | 0 | int src0_stride, const uint8_t* src1, int src1_stride, int h, int w) { |
156 | 0 | switch (mask_type) { |
157 | 0 | case DIFFWTD_38: |
158 | 0 | diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w); |
159 | 0 | break; |
160 | 0 | case DIFFWTD_38_INV: |
161 | 0 | diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w); |
162 | 0 | break; |
163 | 0 | default: |
164 | 0 | assert(0); |
165 | 0 | } |
166 | 0 | } |
167 | | |
168 | | // Note: Expect val to be in q4 precision |
169 | 0 | static INLINE int32_t scaled_x(int32_t val, const ScaleFactors* sf) { |
170 | 0 | const int off = (sf->x_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1)); |
171 | 0 | const int64_t tval = (int64_t)val * sf->x_scale_fp + off; |
172 | 0 | return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval, REF_SCALE_SHIFT - SCALE_EXTRA_BITS); |
173 | 0 | } |
174 | | |
175 | | // Note: Expect val to be in q4 precision |
176 | 0 | static INLINE int32_t scaled_y(int32_t val, const ScaleFactors* sf) { |
177 | 0 | const int32_t off = (sf->y_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1)); |
178 | 0 | const int64_t tval = (int64_t)val * sf->y_scale_fp + off; |
179 | 0 | return (int32_t)ROUND_POWER_OF_TWO_SIGNED_64(tval, REF_SCALE_SHIFT - SCALE_EXTRA_BITS); |
180 | 0 | } |
181 | | |
182 | | // Note: Expect val to be in q4 precision |
183 | 0 | static int32_t unscaled_value(int32_t val, const ScaleFactors* sf) { |
184 | 0 | (void)sf; |
185 | 0 | return val << SCALE_EXTRA_BITS; |
186 | 0 | } |
187 | | |
188 | 948 | static int32_t get_fixed_point_scale_factor(int32_t other_size, int32_t this_size) { |
189 | | // Calculate scaling factor once for each reference frame |
190 | | // and use fixed point scaling factors in decoding and encoding routines. |
191 | | // Hardware implementations can calculate scale factor in device driver |
192 | | // and use multiplication and shifting on hardware instead of division. |
193 | 948 | return ((other_size << REF_SCALE_SHIFT) + this_size / 2) / this_size; |
194 | 948 | } |
195 | | |
196 | | // Given the fixed point scale, calculate coarse point scale. |
197 | 948 | static int32_t fixed_point_scale_to_coarse_point_scale(int32_t scale_fp) { |
198 | 948 | return ROUND_POWER_OF_TWO(scale_fp, REF_SCALE_SHIFT - SCALE_SUBPEL_BITS); |
199 | 948 | } |
200 | | |
201 | 474 | void svt_av1_setup_scale_factors_for_frame(ScaleFactors* sf, int other_w, int other_h, int this_w, int this_h) { |
202 | 474 | if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { |
203 | 0 | sf->x_scale_fp = REF_INVALID_SCALE; |
204 | 0 | sf->y_scale_fp = REF_INVALID_SCALE; |
205 | 0 | return; |
206 | 0 | } |
207 | | |
208 | 474 | sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); |
209 | 474 | sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); |
210 | | |
211 | 474 | sf->x_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->x_scale_fp); |
212 | 474 | sf->y_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->y_scale_fp); |
213 | | |
214 | 474 | if (av1_is_scaled(sf)) { |
215 | 0 | sf->scale_value_x = scaled_x; |
216 | 0 | sf->scale_value_y = scaled_y; |
217 | 474 | } else { |
218 | 474 | sf->scale_value_x = unscaled_value; |
219 | 474 | sf->scale_value_y = unscaled_value; |
220 | 474 | } |
221 | 474 | } |
222 | | |
223 | 0 | static INLINE int32_t has_scale(int32_t xs, int32_t ys) { |
224 | 0 | return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS; |
225 | 0 | } |
226 | | |
227 | 0 | static INLINE void revert_scale_extra_bits(SubpelParams* sp) { |
228 | 0 | sp->subpel_x >>= SCALE_EXTRA_BITS; |
229 | 0 | sp->subpel_y >>= SCALE_EXTRA_BITS; |
230 | 0 | sp->xs >>= SCALE_EXTRA_BITS; |
231 | 0 | sp->ys >>= SCALE_EXTRA_BITS; |
232 | 0 | assert(sp->subpel_x < SUBPEL_SHIFTS); |
233 | 0 | assert(sp->subpel_y < SUBPEL_SHIFTS); |
234 | 0 | assert(sp->xs <= SUBPEL_SHIFTS); |
235 | 0 | assert(sp->ys <= SUBPEL_SHIFTS); |
236 | 0 | } |
237 | | |
238 | | DECLARE_ALIGNED(256, const InterpKernel, sub_pel_filters_8[SUBPEL_SHIFTS]) = {{0, 0, 0, 128, 0, 0, 0, 0}, |
239 | | {0, 2, -6, 126, 8, -2, 0, 0}, |
240 | | {0, 2, -10, 122, 18, -4, 0, 0}, |
241 | | {0, 2, -12, 116, 28, -8, 2, 0}, |
242 | | {0, 2, -14, 110, 38, -10, 2, 0}, |
243 | | {0, 2, -14, 102, 48, -12, 2, 0}, |
244 | | {0, 2, -16, 94, 58, -12, 2, 0}, |
245 | | {0, 2, -14, 84, 66, -12, 2, 0}, |
246 | | {0, 2, -14, 76, 76, -14, 2, 0}, |
247 | | {0, 2, -12, 66, 84, -14, 2, 0}, |
248 | | {0, 2, -12, 58, 94, -16, 2, 0}, |
249 | | {0, 2, -12, 48, 102, -14, 2, 0}, |
250 | | {0, 2, -10, 38, 110, -14, 2, 0}, |
251 | | {0, 2, -8, 28, 116, -12, 2, 0}, |
252 | | {0, 0, -4, 18, 122, -10, 2, 0}, |
253 | | {0, 0, -2, 8, 126, -6, 2, 0}}; |
254 | | DECLARE_ALIGNED(256, const InterpKernel, sub_pel_filters_4[SUBPEL_SHIFTS]) = {{0, 0, 0, 128, 0, 0, 0, 0}, |
255 | | {0, 0, -4, 126, 8, -2, 0, 0}, |
256 | | {0, 0, -8, 122, 18, -4, 0, 0}, |
257 | | {0, 0, -10, 116, 28, -6, 0, 0}, |
258 | | {0, 0, -12, 110, 38, -8, 0, 0}, |
259 | | {0, 0, -12, 102, 48, -10, 0, 0}, |
260 | | {0, 0, -14, 94, 58, -10, 0, 0}, |
261 | | {0, 0, -12, 84, 66, -10, 0, 0}, |
262 | | {0, 0, -12, 76, 76, -12, 0, 0}, |
263 | | {0, 0, -10, 66, 84, -12, 0, 0}, |
264 | | {0, 0, -10, 58, 94, -14, 0, 0}, |
265 | | {0, 0, -10, 48, 102, -12, 0, 0}, |
266 | | {0, 0, -8, 38, 110, -12, 0, 0}, |
267 | | {0, 0, -6, 28, 116, -10, 0, 0}, |
268 | | {0, 0, -4, 18, 122, -8, 0, 0}, |
269 | | {0, 0, -2, 8, 126, -4, 0, 0}}; |
270 | | |
271 | | #define MAX_FILTER_TAP 8 |
272 | | |
273 | 0 | int svt_aom_get_relative_dist_enc(SeqHeader* seq_header, int ref_hint, int order_hint) { |
274 | 0 | int diff, m; |
275 | 0 | if (!seq_header->order_hint_info.enable_order_hint) { |
276 | 0 | return 0; |
277 | 0 | } |
278 | 0 | diff = ref_hint - order_hint; |
279 | 0 | m = 1 << (seq_header->order_hint_info.order_hint_bits - 1); |
280 | 0 | diff = (diff & (m - 1)) - (diff & m); |
281 | 0 | return diff; |
282 | 0 | } |
283 | | |
284 | | static const int quant_dist_weight[4][2] = {{2, 3}, {2, 5}, {2, 7}, {1, MAX_FRAME_DISTANCE}}; |
285 | | static const int quant_dist_lookup_table[2][4][2] = { |
286 | | {{9, 7}, {11, 5}, {12, 4}, {13, 3}}, |
287 | | {{7, 9}, {5, 11}, {4, 12}, {3, 13}}, |
288 | | }; |
289 | | |
290 | | void svt_av1_dist_wtd_comp_weight_assign(SeqHeader* seq_header, int cur_frame_index, int bck_frame_index, |
291 | | int fwd_frame_index, int compound_idx, int order_idx, int* fwd_offset, |
292 | 0 | int* bck_offset, int* use_dist_wtd_comp_avg, int is_compound) { |
293 | 0 | assert(fwd_offset != NULL && bck_offset != NULL); |
294 | 0 | if (!is_compound || compound_idx) { |
295 | 0 | *use_dist_wtd_comp_avg = 0; |
296 | 0 | return; |
297 | 0 | } |
298 | | |
299 | 0 | *use_dist_wtd_comp_avg = 1; |
300 | |
|
301 | 0 | int d0 = clamp( |
302 | 0 | abs(svt_aom_get_relative_dist_enc(seq_header, fwd_frame_index, cur_frame_index)), 0, MAX_FRAME_DISTANCE); |
303 | 0 | int d1 = clamp( |
304 | 0 | abs(svt_aom_get_relative_dist_enc(seq_header, cur_frame_index, bck_frame_index)), 0, MAX_FRAME_DISTANCE); |
305 | |
|
306 | 0 | const int order = d0 <= d1; |
307 | |
|
308 | 0 | if (d0 == 0 || d1 == 0) { |
309 | 0 | *fwd_offset = quant_dist_lookup_table[order_idx][3][order]; |
310 | 0 | *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order]; |
311 | 0 | return; |
312 | 0 | } |
313 | | |
314 | 0 | int i; |
315 | 0 | for (i = 0; i < 3; ++i) { |
316 | 0 | int c0 = quant_dist_weight[i][order]; |
317 | 0 | int c1 = quant_dist_weight[i][!order]; |
318 | 0 | int d0_c0 = d0 * c0; |
319 | 0 | int d1_c1 = d1 * c1; |
320 | 0 | if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) { |
321 | 0 | break; |
322 | 0 | } |
323 | 0 | } |
324 | |
|
325 | 0 | *fwd_offset = quant_dist_lookup_table[order_idx][i][order]; |
326 | 0 | *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order]; |
327 | 0 | } |
328 | | |
329 | | void svt_av1_convolve_2d_sr_c(const uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t w, |
330 | | int32_t h, const InterpFilterParams* filter_params_x, |
331 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
332 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
333 | 0 | int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; |
334 | 0 | int32_t im_h = h + filter_params_y->taps - 1; |
335 | 0 | int32_t im_stride = w; |
336 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
337 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
338 | 0 | const int32_t bd = 8; |
339 | 0 | const int32_t bits = FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; |
340 | | |
341 | | // horizontal filter |
342 | 0 | const uint8_t* src_horiz = src - fo_vert * src_stride; |
343 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
344 | 0 | for (int32_t y = 0; y < im_h; ++y) { |
345 | 0 | for (int32_t x = 0; x < w; ++x) { |
346 | 0 | int32_t sum = (1 << (bd + FILTER_BITS - 1)); |
347 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
348 | 0 | sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; |
349 | 0 | } |
350 | 0 | assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); |
351 | 0 | im_block[y * im_stride + x] = (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); |
352 | 0 | } |
353 | 0 | } |
354 | | |
355 | | // vertical filter |
356 | 0 | int16_t* src_vert = im_block + fo_vert * im_stride; |
357 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
358 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
359 | 0 | for (int32_t y = 0; y < h; ++y) { |
360 | 0 | for (int32_t x = 0; x < w; ++x) { |
361 | 0 | int32_t sum = 1 << offset_bits; |
362 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
363 | 0 | sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; |
364 | 0 | } |
365 | 0 | assert(0 <= sum && sum < (1 << (offset_bits + 2))); |
366 | 0 | int16_t res = (ConvBufType)(ROUND_POWER_OF_TWO(sum, conv_params->round_1) - |
367 | 0 | ((1 << (offset_bits - conv_params->round_1)) + |
368 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)))); |
369 | 0 | dst[y * dst_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), 8); |
370 | 0 | } |
371 | 0 | } |
372 | 0 | } |
373 | | |
374 | | void svt_av1_convolve_y_sr_c(const uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t w, |
375 | | int32_t h, const InterpFilterParams* filter_params_x, |
376 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
377 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
378 | 0 | assert(filter_params_y != NULL); |
379 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
380 | 0 | (void)filter_params_x; |
381 | 0 | (void)subpel_x_q4; |
382 | 0 | (void)conv_params; |
383 | |
|
384 | 0 | assert(conv_params->round_0 <= FILTER_BITS); |
385 | 0 | assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || |
386 | 0 | ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); |
387 | | |
388 | | // vertical filter |
389 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
390 | |
|
391 | 0 | for (int32_t y = 0; y < h; ++y) { |
392 | 0 | for (int32_t x = 0; x < w; ++x) { |
393 | 0 | int32_t res = 0; |
394 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
395 | 0 | res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; |
396 | 0 | } |
397 | 0 | dst[y * dst_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), 8); |
398 | 0 | } |
399 | 0 | } |
400 | 0 | } |
401 | | |
402 | | void svt_av1_convolve_x_sr_c(const uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t w, |
403 | | int32_t h, const InterpFilterParams* filter_params_x, |
404 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
405 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
406 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
407 | 0 | const int32_t bits = FILTER_BITS - conv_params->round_0; |
408 | 0 | (void)filter_params_y; |
409 | 0 | (void)subpel_y_q4; |
410 | 0 | (void)conv_params; |
411 | |
|
412 | 0 | assert(bits >= 0); |
413 | 0 | assert((FILTER_BITS - conv_params->round_1) >= 0 || |
414 | 0 | ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); |
415 | | |
416 | | // horizontal filter |
417 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
418 | |
|
419 | 0 | for (int32_t y = 0; y < h; ++y) { |
420 | 0 | for (int32_t x = 0; x < w; ++x) { |
421 | 0 | int32_t res = 0; |
422 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
423 | 0 | res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; |
424 | 0 | } |
425 | 0 | res = ROUND_POWER_OF_TWO(res, conv_params->round_0); |
426 | 0 | dst[y * dst_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), 8); |
427 | 0 | } |
428 | 0 | } |
429 | 0 | } |
430 | | |
431 | | void svt_av1_convolve_2d_copy_sr_c(const uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t w, |
432 | | int32_t h, const InterpFilterParams* filter_params_x, |
433 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
434 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
435 | 0 | (void)filter_params_x; |
436 | 0 | (void)filter_params_y; |
437 | 0 | (void)subpel_x_q4; |
438 | 0 | (void)subpel_y_q4; |
439 | 0 | (void)conv_params; |
440 | |
|
441 | 0 | for (int32_t y = 0; y < h; ++y) { |
442 | 0 | for (int32_t x = 0; x < w; ++x) { |
443 | 0 | dst[y * dst_stride + x] = src[y * src_stride + x]; |
444 | 0 | } |
445 | 0 | } |
446 | 0 | } |
447 | | |
448 | | void svt_av1_convolve_2d_scale_c(const uint8_t* src, int src_stride, uint8_t* dst8, int dst8_stride, int w, int h, |
449 | | const InterpFilterParams* filter_params_x, const InterpFilterParams* filter_params_y, |
450 | | const int subpel_x_qn, const int x_step_qn, const int subpel_y_qn, const int y_step_qn, |
451 | 0 | ConvolveParams* conv_params) { |
452 | 0 | int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]; |
453 | 0 | int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + filter_params_y->taps; |
454 | 0 | CONV_BUF_TYPE* dst16 = conv_params->dst; |
455 | 0 | const int dst16_stride = conv_params->dst_stride; |
456 | 0 | const int bits = FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; |
457 | 0 | assert(bits >= 0); |
458 | 0 | int im_stride = w; |
459 | 0 | const int fo_vert = filter_params_y->taps / 2 - 1; |
460 | 0 | const int fo_horiz = filter_params_x->taps / 2 - 1; |
461 | 0 | const int bd = 8; |
462 | | |
463 | | // horizontal filter |
464 | 0 | const uint8_t* src_horiz = src - fo_vert * src_stride; |
465 | 0 | for (int y = 0; y < im_h; ++y) { |
466 | 0 | int x_qn = subpel_x_qn; |
467 | 0 | for (int x = 0; x < w; ++x, x_qn += x_step_qn) { |
468 | 0 | const uint8_t* const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)]; |
469 | 0 | const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; |
470 | 0 | assert(x_filter_idx < SUBPEL_SHIFTS); |
471 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx); |
472 | 0 | int32_t sum = (1 << (bd + FILTER_BITS - 1)); |
473 | 0 | for (int k = 0; k < filter_params_x->taps; ++k) { |
474 | 0 | sum += x_filter[k] * src_x[k - fo_horiz]; |
475 | 0 | } |
476 | 0 | assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); |
477 | 0 | im_block[y * im_stride + x] = (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); |
478 | 0 | } |
479 | 0 | src_horiz += src_stride; |
480 | 0 | } |
481 | | |
482 | | // vertical filter |
483 | 0 | int16_t* src_vert = im_block + fo_vert * im_stride; |
484 | 0 | const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
485 | 0 | for (int x = 0; x < w; ++x) { |
486 | 0 | int y_qn = subpel_y_qn; |
487 | 0 | for (int y = 0; y < h; ++y, y_qn += y_step_qn) { |
488 | 0 | const int16_t* src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride]; |
489 | 0 | const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; |
490 | 0 | assert(y_filter_idx < SUBPEL_SHIFTS); |
491 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx); |
492 | 0 | int32_t sum = 1 << offset_bits; |
493 | 0 | for (int k = 0; k < filter_params_y->taps; ++k) { |
494 | 0 | sum += y_filter[k] * src_y[(k - fo_vert) * im_stride]; |
495 | 0 | } |
496 | 0 | assert(0 <= sum && sum < (1 << (offset_bits + 2))); |
497 | 0 | CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); |
498 | 0 | if (conv_params->is_compound) { |
499 | 0 | if (conv_params->do_average) { |
500 | 0 | int32_t tmp = dst16[y * dst16_stride + x]; |
501 | 0 | if (conv_params->use_dist_wtd_comp_avg) { |
502 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
503 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
504 | 0 | } else { |
505 | 0 | tmp += res; |
506 | 0 | tmp = tmp >> 1; |
507 | 0 | } |
508 | | /* Subtract round offset and convolve round */ |
509 | 0 | tmp = tmp - |
510 | 0 | ((1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1))); |
511 | 0 | dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); |
512 | 0 | } else { |
513 | 0 | dst16[y * dst16_stride + x] = res; |
514 | 0 | } |
515 | 0 | } else { |
516 | | /* Subtract round offset and convolve round */ |
517 | 0 | int32_t tmp = res - |
518 | 0 | ((1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1))); |
519 | 0 | dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); |
520 | 0 | } |
521 | 0 | } |
522 | 0 | src_vert++; |
523 | 0 | } |
524 | 0 | } |
525 | | |
526 | | void svt_av1_jnt_convolve_2d_c(const uint8_t* src, int32_t src_stride, uint8_t* dst8, int32_t dst8_stride, int32_t w, |
527 | | int32_t h, const InterpFilterParams* filter_params_x, |
528 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
529 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
530 | 0 | ConvBufType* dst = conv_params->dst; |
531 | 0 | int32_t dst_stride = conv_params->dst_stride; |
532 | 0 | int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; |
533 | 0 | int32_t im_h = h + filter_params_y->taps - 1; |
534 | 0 | int32_t im_stride = w; |
535 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
536 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
537 | 0 | const int32_t bd = 8; |
538 | 0 | const int32_t round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
539 | | |
540 | | // horizontal filter |
541 | 0 | const uint8_t* src_horiz = src - fo_vert * src_stride; |
542 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
543 | 0 | for (int32_t y = 0; y < im_h; ++y) { |
544 | 0 | for (int32_t x = 0; x < w; ++x) { |
545 | 0 | int32_t sum = (1 << (bd + FILTER_BITS - 1)); |
546 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
547 | 0 | sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; |
548 | 0 | } |
549 | 0 | assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); |
550 | 0 | im_block[y * im_stride + x] = (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); |
551 | 0 | } |
552 | 0 | } |
553 | | |
554 | | // vertical filter |
555 | 0 | int16_t* src_vert = im_block + fo_vert * im_stride; |
556 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
557 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
558 | 0 | for (int32_t y = 0; y < h; ++y) { |
559 | 0 | for (int32_t x = 0; x < w; ++x) { |
560 | 0 | int32_t sum = 1 << offset_bits; |
561 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
562 | 0 | sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; |
563 | 0 | } |
564 | 0 | assert(0 <= sum && sum < (1 << (offset_bits + 2))); |
565 | 0 | ConvBufType res = (ConvBufType)ROUND_POWER_OF_TWO(sum, conv_params->round_1); |
566 | 0 | if (conv_params->do_average) { |
567 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
568 | 0 | if (conv_params->use_jnt_comp_avg) { |
569 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
570 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
571 | 0 | } else { |
572 | 0 | tmp += res; |
573 | 0 | tmp = tmp >> 1; |
574 | 0 | } |
575 | 0 | tmp -= (1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1)); |
576 | 0 | dst8[y * dst8_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), 8); |
577 | 0 | } else { |
578 | 0 | dst[y * dst_stride + x] = res; |
579 | 0 | } |
580 | 0 | } |
581 | 0 | } |
582 | 0 | } |
583 | | |
584 | | void svt_av1_jnt_convolve_y_c(const uint8_t* src, int32_t src_stride, uint8_t* dst8, int32_t dst8_stride, int32_t w, |
585 | | int32_t h, const InterpFilterParams* filter_params_x, |
586 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
587 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
588 | 0 | ConvBufType* dst = conv_params->dst; |
589 | 0 | int32_t dst_stride = conv_params->dst_stride; |
590 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
591 | 0 | const int32_t bits = FILTER_BITS - conv_params->round_0; |
592 | 0 | const int32_t bd = 8; |
593 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
594 | 0 | const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) + |
595 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
596 | 0 | const int32_t round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
597 | 0 | (void)filter_params_x; |
598 | 0 | (void)subpel_x_q4; |
599 | | |
600 | | // vertical filter |
601 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
602 | 0 | for (int32_t y = 0; y < h; ++y) { |
603 | 0 | for (int32_t x = 0; x < w; ++x) { |
604 | 0 | int32_t res = 0; |
605 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
606 | 0 | res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; |
607 | 0 | } |
608 | 0 | res *= (1 << bits); |
609 | 0 | res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset; |
610 | |
|
611 | 0 | if (conv_params->do_average) { |
612 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
613 | 0 | if (conv_params->use_jnt_comp_avg) { |
614 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
615 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
616 | 0 | } else { |
617 | 0 | tmp += res; |
618 | 0 | tmp = tmp >> 1; |
619 | 0 | } |
620 | 0 | tmp -= round_offset; |
621 | 0 | dst8[y * dst8_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), 8); |
622 | 0 | } else { |
623 | 0 | dst[y * dst_stride + x] = (ConvBufType)res; |
624 | 0 | } |
625 | 0 | } |
626 | 0 | } |
627 | 0 | } |
628 | | |
629 | | void svt_av1_jnt_convolve_x_c(const uint8_t* src, int32_t src_stride, uint8_t* dst8, int32_t dst8_stride, int32_t w, |
630 | | int32_t h, const InterpFilterParams* filter_params_x, |
631 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
632 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
633 | 0 | ConvBufType* dst = conv_params->dst; |
634 | 0 | int32_t dst_stride = conv_params->dst_stride; |
635 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
636 | 0 | const int32_t bits = FILTER_BITS - conv_params->round_1; |
637 | 0 | const int32_t bd = 8; |
638 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
639 | 0 | const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) + |
640 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
641 | 0 | const int32_t round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
642 | 0 | (void)filter_params_y; |
643 | 0 | (void)subpel_y_q4; |
644 | | |
645 | | // horizontal filter |
646 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
647 | 0 | for (int32_t y = 0; y < h; ++y) { |
648 | 0 | for (int32_t x = 0; x < w; ++x) { |
649 | 0 | int32_t res = 0; |
650 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
651 | 0 | res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; |
652 | 0 | } |
653 | 0 | res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0); |
654 | 0 | res += round_offset; |
655 | |
|
656 | 0 | if (conv_params->do_average) { |
657 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
658 | 0 | if (conv_params->use_jnt_comp_avg) { |
659 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
660 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
661 | 0 | } else { |
662 | 0 | tmp += res; |
663 | 0 | tmp = tmp >> 1; |
664 | 0 | } |
665 | 0 | tmp -= round_offset; |
666 | 0 | dst8[y * dst8_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), 8); |
667 | 0 | } else { |
668 | 0 | dst[y * dst_stride + x] = (ConvBufType)res; |
669 | 0 | } |
670 | 0 | } |
671 | 0 | } |
672 | 0 | } |
673 | | |
674 | | void svt_av1_jnt_convolve_2d_copy_c(const uint8_t* src, int32_t src_stride, uint8_t* dst8, int32_t dst8_stride, |
675 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
676 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
677 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params) { |
678 | 0 | ConvBufType* dst = conv_params->dst; |
679 | 0 | int32_t dst_stride = conv_params->dst_stride; |
680 | 0 | const int32_t bits = FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; |
681 | 0 | const int32_t bd = 8; |
682 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
683 | 0 | const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) + |
684 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
685 | 0 | (void)filter_params_x; |
686 | 0 | (void)filter_params_y; |
687 | 0 | (void)subpel_x_q4; |
688 | 0 | (void)subpel_y_q4; |
689 | |
|
690 | 0 | for (int32_t y = 0; y < h; ++y) { |
691 | 0 | for (int32_t x = 0; x < w; ++x) { |
692 | 0 | ConvBufType res = src[y * src_stride + x] << bits; |
693 | 0 | res += (ConvBufType)round_offset; |
694 | |
|
695 | 0 | if (conv_params->do_average) { |
696 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
697 | 0 | if (conv_params->use_jnt_comp_avg) { |
698 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
699 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
700 | 0 | } else { |
701 | 0 | tmp += res; |
702 | 0 | tmp = tmp >> 1; |
703 | 0 | } |
704 | 0 | tmp -= round_offset; |
705 | 0 | dst8[y * dst8_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), 8); |
706 | 0 | } else { |
707 | 0 | dst[y * dst_stride + x] = res; |
708 | 0 | } |
709 | 0 | } |
710 | 0 | } |
711 | 0 | } |
712 | | |
713 | | void svt_av1_highbd_convolve_2d_copy_sr_c(const uint16_t* src, int32_t src_stride, uint16_t* dst, int32_t dst_stride, |
714 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
715 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
716 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
717 | 0 | (void)filter_params_x; |
718 | 0 | (void)filter_params_y; |
719 | 0 | (void)subpel_x_q4; |
720 | 0 | (void)subpel_y_q4; |
721 | 0 | (void)conv_params; |
722 | 0 | (void)bd; |
723 | |
|
724 | 0 | for (int32_t y = 0; y < h; ++y) { |
725 | 0 | for (int32_t x = 0; x < w; ++x) { |
726 | 0 | dst[y * dst_stride + x] = src[y * src_stride + x]; |
727 | 0 | } |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | | void svt_av1_highbd_convolve_x_sr_c(const uint16_t* src, int32_t src_stride, uint16_t* dst, int32_t dst_stride, |
732 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
733 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
734 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
735 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
736 | 0 | const int32_t bits = FILTER_BITS - conv_params->round_0; |
737 | 0 | (void)filter_params_y; |
738 | 0 | (void)subpel_y_q4; |
739 | |
|
740 | 0 | assert(bits >= 0); |
741 | 0 | assert((FILTER_BITS - conv_params->round_1) >= 0 || |
742 | 0 | ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); |
743 | | |
744 | | // horizontal filter |
745 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
746 | 0 | for (int32_t y = 0; y < h; ++y) { |
747 | 0 | for (int32_t x = 0; x < w; ++x) { |
748 | 0 | int32_t res = 0; |
749 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
750 | 0 | res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; |
751 | 0 | } |
752 | 0 | res = ROUND_POWER_OF_TWO(res, conv_params->round_0); |
753 | 0 | dst[y * dst_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd); |
754 | 0 | } |
755 | 0 | } |
756 | 0 | } |
757 | | |
758 | | void svt_av1_highbd_convolve_y_sr_c(const uint16_t* src, int32_t src_stride, uint16_t* dst, int32_t dst_stride, |
759 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
760 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
761 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
762 | 0 | assert(filter_params_y != NULL); |
763 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
764 | 0 | (void)filter_params_x; |
765 | 0 | (void)subpel_x_q4; |
766 | 0 | (void)conv_params; |
767 | |
|
768 | 0 | assert(conv_params->round_0 <= FILTER_BITS); |
769 | 0 | assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || |
770 | 0 | ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); |
771 | | // vertical filter |
772 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
773 | 0 | for (int32_t y = 0; y < h; ++y) { |
774 | 0 | for (int32_t x = 0; x < w; ++x) { |
775 | 0 | int32_t res = 0; |
776 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
777 | 0 | res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; |
778 | 0 | } |
779 | 0 | dst[y * dst_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), bd); |
780 | 0 | } |
781 | 0 | } |
782 | 0 | } |
783 | | |
784 | | void svt_av1_highbd_convolve_2d_sr_c(const uint16_t* src, int32_t src_stride, uint16_t* dst, int32_t dst_stride, |
785 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
786 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
787 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
788 | 0 | int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; |
789 | 0 | int32_t im_h = h + filter_params_y->taps - 1; |
790 | 0 | int32_t im_stride = w; |
791 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
792 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
793 | 0 | const int32_t bits = FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; |
794 | 0 | assert(bits >= 0); |
795 | | |
796 | | // horizontal filter |
797 | 0 | const uint16_t* src_horiz = src - fo_vert * src_stride; |
798 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
799 | 0 | for (int32_t y = 0; y < im_h; ++y) { |
800 | 0 | for (int32_t x = 0; x < w; ++x) { |
801 | 0 | int32_t sum = (1 << (bd + FILTER_BITS - 1)); |
802 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
803 | 0 | sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; |
804 | 0 | } |
805 | 0 | assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); |
806 | 0 | im_block[y * im_stride + x] = (ConvBufType)ROUND_POWER_OF_TWO(sum, conv_params->round_0); |
807 | 0 | } |
808 | 0 | } |
809 | | |
810 | | // vertical filter |
811 | 0 | int16_t* src_vert = im_block + fo_vert * im_stride; |
812 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
813 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
814 | 0 | for (int32_t y = 0; y < h; ++y) { |
815 | 0 | for (int32_t x = 0; x < w; ++x) { |
816 | 0 | int32_t sum = 1 << offset_bits; |
817 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
818 | 0 | sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; |
819 | 0 | } |
820 | 0 | assert(0 <= sum && sum < (1 << (offset_bits + 2))); |
821 | 0 | int32_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - |
822 | 0 | ((1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1))); |
823 | 0 | dst[y * dst_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd); |
824 | 0 | } |
825 | 0 | } |
826 | 0 | } |
827 | | |
828 | | void svt_av1_highbd_convolve_2d_scale_c(const uint16_t* src, int src_stride, uint16_t* dst, int dst_stride, int w, |
829 | | int h, const InterpFilterParams* filter_params_x, |
830 | | const InterpFilterParams* filter_params_y, const int subpel_x_qn, |
831 | | const int x_step_qn, const int subpel_y_qn, const int y_step_qn, |
832 | 0 | ConvolveParams* conv_params, int bd) { |
833 | 0 | int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]; |
834 | 0 | int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + filter_params_y->taps; |
835 | 0 | int im_stride = w; |
836 | 0 | const int fo_vert = filter_params_y->taps / 2 - 1; |
837 | 0 | const int fo_horiz = filter_params_x->taps / 2 - 1; |
838 | 0 | CONV_BUF_TYPE* dst16 = conv_params->dst; |
839 | 0 | const int dst16_stride = conv_params->dst_stride; |
840 | 0 | const int bits = FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; |
841 | 0 | assert(bits >= 0); |
842 | | // horizontal filter |
843 | 0 | const uint16_t* src_horiz = src - fo_vert * src_stride; |
844 | 0 | for (int y = 0; y < im_h; ++y) { |
845 | 0 | int x_qn = subpel_x_qn; |
846 | 0 | for (int x = 0; x < w; ++x, x_qn += x_step_qn) { |
847 | 0 | const uint16_t* const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)]; |
848 | 0 | const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; |
849 | 0 | assert(x_filter_idx < SUBPEL_SHIFTS); |
850 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx); |
851 | 0 | int32_t sum = (1 << (bd + FILTER_BITS - 1)); |
852 | 0 | for (int k = 0; k < filter_params_x->taps; ++k) { |
853 | 0 | sum += x_filter[k] * src_x[k - fo_horiz]; |
854 | 0 | } |
855 | 0 | assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); |
856 | 0 | im_block[y * im_stride + x] = (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); |
857 | 0 | } |
858 | 0 | src_horiz += src_stride; |
859 | 0 | } |
860 | | |
861 | | // vertical filter |
862 | 0 | int16_t* src_vert = im_block + fo_vert * im_stride; |
863 | 0 | const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
864 | 0 | for (int x = 0; x < w; ++x) { |
865 | 0 | int y_qn = subpel_y_qn; |
866 | 0 | for (int y = 0; y < h; ++y, y_qn += y_step_qn) { |
867 | 0 | const int16_t* src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride]; |
868 | 0 | const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; |
869 | 0 | assert(y_filter_idx < SUBPEL_SHIFTS); |
870 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx); |
871 | 0 | int32_t sum = 1 << offset_bits; |
872 | 0 | for (int k = 0; k < filter_params_y->taps; ++k) { |
873 | 0 | sum += y_filter[k] * src_y[(k - fo_vert) * im_stride]; |
874 | 0 | } |
875 | 0 | assert(0 <= sum && sum < (1 << (offset_bits + 2))); |
876 | 0 | CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); |
877 | 0 | if (conv_params->is_compound) { |
878 | 0 | if (conv_params->do_average) { |
879 | 0 | int32_t tmp = dst16[y * dst16_stride + x]; |
880 | 0 | if (conv_params->use_dist_wtd_comp_avg) { |
881 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
882 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
883 | 0 | } else { |
884 | 0 | tmp += res; |
885 | 0 | tmp = tmp >> 1; |
886 | 0 | } |
887 | | /* Subtract round offset and convolve round */ |
888 | 0 | tmp = tmp - |
889 | 0 | ((1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1))); |
890 | 0 | dst[y * dst_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); |
891 | 0 | } else { |
892 | 0 | dst16[y * dst16_stride + x] = res; |
893 | 0 | } |
894 | 0 | } else { |
895 | | /* Subtract round offset and convolve round */ |
896 | 0 | int32_t tmp = res - |
897 | 0 | ((1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1))); |
898 | 0 | dst[y * dst_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); |
899 | 0 | } |
900 | 0 | } |
901 | 0 | src_vert++; |
902 | 0 | } |
903 | 0 | } |
904 | | |
905 | | void svt_av1_highbd_jnt_convolve_x_c(const uint16_t* src, int32_t src_stride, uint16_t* dst16, int32_t dst16_stride, |
906 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
907 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
908 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
909 | 0 | ConvBufType* dst = conv_params->dst; |
910 | 0 | int32_t dst_stride = conv_params->dst_stride; |
911 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
912 | 0 | const int32_t bits = FILTER_BITS - conv_params->round_1; |
913 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
914 | 0 | const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) + |
915 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
916 | 0 | const int32_t round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
917 | 0 | assert(round_bits >= 0); |
918 | 0 | (void)filter_params_y; |
919 | 0 | (void)subpel_y_q4; |
920 | 0 | assert(bits >= 0); |
921 | | // horizontal filter |
922 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
923 | 0 | for (int32_t y = 0; y < h; ++y) { |
924 | 0 | for (int32_t x = 0; x < w; ++x) { |
925 | 0 | int32_t res = 0; |
926 | 0 | for (int32_t k = 0; k < filter_params_x->taps; ++k) { |
927 | 0 | res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; |
928 | 0 | } |
929 | 0 | res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0); |
930 | 0 | res += round_offset; |
931 | |
|
932 | 0 | if (conv_params->do_average) { |
933 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
934 | 0 | if (conv_params->use_jnt_comp_avg) { |
935 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
936 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
937 | 0 | } else { |
938 | 0 | tmp += res; |
939 | 0 | tmp = tmp >> 1; |
940 | 0 | } |
941 | 0 | tmp -= round_offset; |
942 | 0 | dst16[y * dst16_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd); |
943 | 0 | } else { |
944 | 0 | dst[y * dst_stride + x] = (ConvBufType)res; |
945 | 0 | } |
946 | 0 | } |
947 | 0 | } |
948 | 0 | } |
949 | | |
950 | | void svt_av1_highbd_jnt_convolve_y_c(const uint16_t* src, int32_t src_stride, uint16_t* dst16, int32_t dst16_stride, |
951 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
952 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
953 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
954 | 0 | ConvBufType* dst = conv_params->dst; |
955 | 0 | int32_t dst_stride = conv_params->dst_stride; |
956 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
957 | 0 | const int32_t bits = FILTER_BITS - conv_params->round_0; |
958 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
959 | 0 | const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) + |
960 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
961 | 0 | const int32_t round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
962 | 0 | assert(round_bits >= 0); |
963 | 0 | (void)filter_params_x; |
964 | 0 | (void)subpel_x_q4; |
965 | 0 | assert(bits >= 0); |
966 | | // vertical filter |
967 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
968 | 0 | for (int32_t y = 0; y < h; ++y) { |
969 | 0 | for (int32_t x = 0; x < w; ++x) { |
970 | 0 | int32_t res = 0; |
971 | 0 | for (int32_t k = 0; k < filter_params_y->taps; ++k) { |
972 | 0 | res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; |
973 | 0 | } |
974 | 0 | res *= (1 << bits); |
975 | 0 | res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset; |
976 | |
|
977 | 0 | if (conv_params->do_average) { |
978 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
979 | 0 | if (conv_params->use_jnt_comp_avg) { |
980 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
981 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
982 | 0 | } else { |
983 | 0 | tmp += res; |
984 | 0 | tmp = tmp >> 1; |
985 | 0 | } |
986 | 0 | tmp -= round_offset; |
987 | 0 | dst16[y * dst16_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd); |
988 | 0 | } else { |
989 | 0 | dst[y * dst_stride + x] = (ConvBufType)res; |
990 | 0 | } |
991 | 0 | } |
992 | 0 | } |
993 | 0 | } |
994 | | |
995 | | void svt_av1_highbd_jnt_convolve_2d_copy_c(const uint16_t* src, int32_t src_stride, uint16_t* dst16, |
996 | | int32_t dst16_stride, int32_t w, int32_t h, |
997 | | const InterpFilterParams* filter_params_x, |
998 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
999 | 0 | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) { |
1000 | 0 | ConvBufType* dst = conv_params->dst; |
1001 | 0 | int32_t dst_stride = conv_params->dst_stride; |
1002 | 0 | const int32_t bits = FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; |
1003 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
1004 | 0 | const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) + |
1005 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
1006 | 0 | assert(bits >= 0); |
1007 | 0 | (void)filter_params_x; |
1008 | 0 | (void)filter_params_y; |
1009 | 0 | (void)subpel_x_q4; |
1010 | 0 | (void)subpel_y_q4; |
1011 | |
|
1012 | 0 | for (int32_t y = 0; y < h; ++y) { |
1013 | 0 | for (int32_t x = 0; x < w; ++x) { |
1014 | 0 | ConvBufType res = src[y * src_stride + x] << bits; |
1015 | 0 | res += (ConvBufType)round_offset; |
1016 | 0 | if (conv_params->do_average) { |
1017 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
1018 | 0 | if (conv_params->use_jnt_comp_avg) { |
1019 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
1020 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
1021 | 0 | } else { |
1022 | 0 | tmp += res; |
1023 | 0 | tmp = tmp >> 1; |
1024 | 0 | } |
1025 | 0 | tmp -= round_offset; |
1026 | 0 | dst16[y * dst16_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); |
1027 | 0 | } else { |
1028 | 0 | dst[y * dst_stride + x] = res; |
1029 | 0 | } |
1030 | 0 | } |
1031 | 0 | } |
1032 | 0 | } |
1033 | | |
1034 | | void svt_av1_highbd_jnt_convolve_2d_c(const uint16_t* src, int32_t src_stride, uint16_t* dst16, int32_t dst16_stride, |
1035 | | int32_t w, int32_t h, const InterpFilterParams* filter_params_x, |
1036 | | const InterpFilterParams* filter_params_y, const int32_t subpel_x_q4, |
1037 | | const int32_t subpel_y_q4, ConvolveParams* conv_params, int32_t bd) |
1038 | | |
1039 | 0 | { |
1040 | 0 | int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; |
1041 | 0 | ConvBufType* dst = conv_params->dst; |
1042 | 0 | int32_t dst_stride = conv_params->dst_stride; |
1043 | 0 | int32_t im_h = h + filter_params_y->taps - 1; |
1044 | 0 | int32_t im_stride = w; |
1045 | 0 | const int32_t fo_vert = filter_params_y->taps / 2 - 1; |
1046 | 0 | const int32_t fo_horiz = filter_params_x->taps / 2 - 1; |
1047 | |
|
1048 | 0 | const int32_t round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
1049 | 0 | assert(round_bits >= 0); |
1050 | | |
1051 | | // horizontal filter |
1052 | 0 | const uint16_t* src_horiz = src - fo_vert * src_stride; |
1053 | 0 | const int16_t* x_filter = av1_get_interp_filter_subpel_kernel(*filter_params_x, subpel_x_q4 & SUBPEL_MASK); |
1054 | 0 | for (int y = 0; y < im_h; ++y) { |
1055 | 0 | for (int x = 0; x < w; ++x) { |
1056 | 0 | int32_t sum = (1 << (bd + FILTER_BITS - 1)); |
1057 | 0 | for (int k = 0; k < filter_params_x->taps; ++k) { |
1058 | 0 | sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; |
1059 | 0 | } |
1060 | 0 | assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); |
1061 | 0 | (void)bd; |
1062 | 0 | im_block[y * im_stride + x] = (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); |
1063 | 0 | } |
1064 | 0 | } |
1065 | | |
1066 | | // vertical filter |
1067 | 0 | int16_t* src_vert = im_block + fo_vert * im_stride; |
1068 | 0 | const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
1069 | 0 | const int16_t* y_filter = av1_get_interp_filter_subpel_kernel(*filter_params_y, subpel_y_q4 & SUBPEL_MASK); |
1070 | 0 | for (int y = 0; y < h; ++y) { |
1071 | 0 | for (int x = 0; x < w; ++x) { |
1072 | 0 | int32_t sum = 1 << offset_bits; |
1073 | 0 | for (int k = 0; k < filter_params_y->taps; ++k) { |
1074 | 0 | sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; |
1075 | 0 | } |
1076 | 0 | assert(0 <= sum && sum < (1 << (offset_bits + 2))); |
1077 | 0 | ConvBufType res = (ConvBufType)ROUND_POWER_OF_TWO(sum, conv_params->round_1); |
1078 | 0 | if (conv_params->do_average) { |
1079 | 0 | int32_t tmp = dst[y * dst_stride + x]; |
1080 | 0 | if (conv_params->use_jnt_comp_avg) { |
1081 | 0 | tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; |
1082 | 0 | tmp = tmp >> DIST_PRECISION_BITS; |
1083 | 0 | } else { |
1084 | 0 | tmp += res; |
1085 | 0 | tmp = tmp >> 1; |
1086 | 0 | } |
1087 | 0 | tmp -= (1 << (offset_bits - conv_params->round_1)) + (1 << (offset_bits - conv_params->round_1 - 1)); |
1088 | 0 | dst16[y * dst16_stride + x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd); |
1089 | 0 | } else { |
1090 | 0 | dst[y * dst_stride + x] = res; |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | } |
1095 | | |
1096 | | aom_highbd_convolve_fn_t svt_aom_convolveHbd[/*subX*/ 2][/*subY*/ 2][/*bi*/ 2]; |
1097 | | |
1098 | 1 | void svt_aom_asm_set_convolve_hbd_asm_table(void) { |
1099 | 1 | svt_aom_convolveHbd[0][0][0] = svt_av1_highbd_convolve_2d_copy_sr; |
1100 | 1 | svt_aom_convolveHbd[0][0][1] = svt_av1_highbd_jnt_convolve_2d_copy; |
1101 | | |
1102 | 1 | svt_aom_convolveHbd[0][1][0] = svt_av1_highbd_convolve_y_sr; |
1103 | 1 | svt_aom_convolveHbd[0][1][1] = svt_av1_highbd_jnt_convolve_y; |
1104 | | |
1105 | 1 | svt_aom_convolveHbd[1][0][0] = svt_av1_highbd_convolve_x_sr; |
1106 | 1 | svt_aom_convolveHbd[1][0][1] = svt_av1_highbd_jnt_convolve_x; |
1107 | | |
1108 | 1 | svt_aom_convolveHbd[1][1][0] = svt_av1_highbd_convolve_2d_sr; |
1109 | 1 | svt_aom_convolveHbd[1][1][1] = svt_av1_highbd_jnt_convolve_2d; |
1110 | 1 | } |
1111 | | |
1112 | | AomConvolveFn svt_aom_convolve[/*subX*/ 2][/*subY*/ 2][/*bi*/ 2]; |
1113 | | |
1114 | 1 | void svt_aom_asm_set_convolve_asm_table(void) { |
1115 | 1 | svt_aom_convolve[0][0][0] = svt_av1_convolve_2d_copy_sr; |
1116 | 1 | svt_aom_convolve[0][0][1] = svt_av1_jnt_convolve_2d_copy; |
1117 | | |
1118 | 1 | svt_aom_convolve[0][1][0] = svt_av1_convolve_y_sr; |
1119 | 1 | svt_aom_convolve[0][1][1] = svt_av1_jnt_convolve_y; |
1120 | | |
1121 | 1 | svt_aom_convolve[1][0][0] = svt_av1_convolve_x_sr; |
1122 | 1 | svt_aom_convolve[1][0][1] = svt_av1_jnt_convolve_x; |
1123 | | |
1124 | 1 | svt_aom_convolve[1][1][0] = svt_av1_convolve_2d_sr; |
1125 | 1 | svt_aom_convolve[1][1][1] = svt_av1_jnt_convolve_2d; |
1126 | 1 | } |
1127 | | |
1128 | | DECLARE_ALIGNED(256, const InterpKernel, sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {{0, 0, 0, 128, 0, 0, 0, 0}, |
1129 | | {-2, 2, -6, 126, 8, -2, 2, 0}, |
1130 | | {-2, 6, -12, 124, 16, -6, 4, -2}, |
1131 | | {-2, 8, -18, 120, 26, -10, 6, -2}, |
1132 | | {-4, 10, -22, 116, 38, -14, 6, -2}, |
1133 | | {-4, 10, -22, 108, 48, -18, 8, -2}, |
1134 | | {-4, 10, -24, 100, 60, -20, 8, -2}, |
1135 | | {-4, 10, -24, 90, 70, -22, 10, -2}, |
1136 | | {-4, 12, -24, 80, 80, -24, 12, -4}, |
1137 | | {-2, 10, -22, 70, 90, -24, 10, -4}, |
1138 | | {-2, 8, -20, 60, 100, -24, 10, -4}, |
1139 | | {-2, 8, -18, 48, 108, -22, 10, -4}, |
1140 | | {-2, 6, -14, 38, 116, -22, 10, -4}, |
1141 | | {-2, 6, -10, 26, 120, -18, 8, -2}, |
1142 | | {-2, 4, -6, 16, 124, -12, 6, -2}, |
1143 | | {0, 2, -2, 8, 126, -6, 2, -2}}; |
1144 | | |
1145 | | DECLARE_ALIGNED(256, const InterpKernel, sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {{0, 0, 0, 128, 0, 0, 0, 0}, |
1146 | | {0, 2, 28, 62, 34, 2, 0, 0}, |
1147 | | {0, 0, 26, 62, 36, 4, 0, 0}, |
1148 | | {0, 0, 22, 62, 40, 4, 0, 0}, |
1149 | | {0, 0, 20, 60, 42, 6, 0, 0}, |
1150 | | {0, 0, 18, 58, 44, 8, 0, 0}, |
1151 | | {0, 0, 16, 56, 46, 10, 0, 0}, |
1152 | | {0, -2, 16, 54, 48, 12, 0, 0}, |
1153 | | {0, -2, 14, 52, 52, 14, -2, 0}, |
1154 | | {0, 0, 12, 48, 54, 16, -2, 0}, |
1155 | | {0, 0, 10, 46, 56, 16, 0, 0}, |
1156 | | {0, 0, 8, 44, 58, 18, 0, 0}, |
1157 | | {0, 0, 6, 42, 60, 20, 0, 0}, |
1158 | | {0, 0, 4, 40, 62, 22, 0, 0}, |
1159 | | {0, 0, 4, 36, 62, 26, 0, 0}, |
1160 | | {0, 0, 2, 34, 62, 28, 2, 0}}; |
1161 | | DECLARE_ALIGNED(256, const InterpKernel, bilinear_filters[SUBPEL_SHIFTS]) = {{0, 0, 0, 128, 0, 0, 0, 0}, |
1162 | | {0, 0, 0, 120, 8, 0, 0, 0}, |
1163 | | {0, 0, 0, 112, 16, 0, 0, 0}, |
1164 | | {0, 0, 0, 104, 24, 0, 0, 0}, |
1165 | | {0, 0, 0, 96, 32, 0, 0, 0}, |
1166 | | {0, 0, 0, 88, 40, 0, 0, 0}, |
1167 | | {0, 0, 0, 80, 48, 0, 0, 0}, |
1168 | | {0, 0, 0, 72, 56, 0, 0, 0}, |
1169 | | {0, 0, 0, 64, 64, 0, 0, 0}, |
1170 | | {0, 0, 0, 56, 72, 0, 0, 0}, |
1171 | | {0, 0, 0, 48, 80, 0, 0, 0}, |
1172 | | {0, 0, 0, 40, 88, 0, 0, 0}, |
1173 | | {0, 0, 0, 32, 96, 0, 0, 0}, |
1174 | | {0, 0, 0, 24, 104, 0, 0, 0}, |
1175 | | {0, 0, 0, 16, 112, 0, 0, 0}, |
1176 | | {0, 0, 0, 8, 120, 0, 0, 0}}; |
1177 | | DECLARE_ALIGNED(256, const InterpKernel, sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {{0, 0, 0, 128, 0, 0, 0, 0}, |
1178 | | {0, 0, 30, 62, 34, 2, 0, 0}, |
1179 | | {0, 0, 26, 62, 36, 4, 0, 0}, |
1180 | | {0, 0, 22, 62, 40, 4, 0, 0}, |
1181 | | {0, 0, 20, 60, 42, 6, 0, 0}, |
1182 | | {0, 0, 18, 58, 44, 8, 0, 0}, |
1183 | | {0, 0, 16, 56, 46, 10, 0, 0}, |
1184 | | {0, 0, 14, 54, 48, 12, 0, 0}, |
1185 | | {0, 0, 12, 52, 52, 12, 0, 0}, |
1186 | | {0, 0, 12, 48, 54, 14, 0, 0}, |
1187 | | {0, 0, 10, 46, 56, 16, 0, 0}, |
1188 | | {0, 0, 8, 44, 58, 18, 0, 0}, |
1189 | | {0, 0, 6, 42, 60, 20, 0, 0}, |
1190 | | {0, 0, 4, 40, 62, 22, 0, 0}, |
1191 | | {0, 0, 4, 36, 62, 26, 0, 0}, |
1192 | | {0, 0, 2, 34, 62, 30, 0, 0}}; |
1193 | | BlockSize svt_aom_scale_chroma_bsize(BlockSize bsize, int32_t subsampling_x, int32_t subsampling_y); |
1194 | | |
1195 | | void convolve_2d_for_intrabc(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int w, int h, |
1196 | 0 | int subpel_x_q4, int subpel_y_q4, ConvolveParams* conv_params) { |
1197 | 0 | const InterpFilterParams* filter_params_x = subpel_x_q4 ? &av1_interp_filter_params_list[BILINEAR] : NULL; |
1198 | 0 | const InterpFilterParams* filter_params_y = subpel_y_q4 ? &av1_interp_filter_params_list[BILINEAR] : NULL; |
1199 | 0 | if (subpel_x_q4 != 0 && subpel_y_q4 != 0) { |
1200 | 0 | svt_av1_convolve_2d_sr(src, |
1201 | 0 | src_stride, |
1202 | 0 | dst, |
1203 | 0 | dst_stride, |
1204 | 0 | w, |
1205 | 0 | h, |
1206 | 0 | (InterpFilterParams*)filter_params_x, |
1207 | 0 | (InterpFilterParams*)filter_params_y, |
1208 | 0 | 8, |
1209 | 0 | 8, |
1210 | 0 | conv_params); |
1211 | 0 | } else if (subpel_x_q4 != 0) { |
1212 | 0 | svt_av1_convolve_x_sr(src, |
1213 | 0 | src_stride, |
1214 | 0 | dst, |
1215 | 0 | dst_stride, |
1216 | 0 | w, |
1217 | 0 | h, |
1218 | 0 | (InterpFilterParams*)filter_params_x, |
1219 | 0 | (InterpFilterParams*)filter_params_y, |
1220 | 0 | 8, |
1221 | 0 | 0, |
1222 | 0 | conv_params); |
1223 | 0 | } else { |
1224 | 0 | svt_av1_convolve_y_sr(src, |
1225 | 0 | src_stride, |
1226 | 0 | dst, |
1227 | 0 | dst_stride, |
1228 | 0 | w, |
1229 | 0 | h, |
1230 | 0 | (InterpFilterParams*)filter_params_x, |
1231 | 0 | (InterpFilterParams*)filter_params_y, |
1232 | 0 | 0, |
1233 | 0 | 8, |
1234 | 0 | conv_params); |
1235 | 0 | } |
1236 | 0 | } |
1237 | | |
1238 | | void highbd_convolve_2d_for_intrabc(const uint16_t* src, int src_stride, uint16_t* dst, int dst_stride, int w, int h, |
1239 | 0 | int subpel_x_q4, int subpel_y_q4, ConvolveParams* conv_params, int bd) { |
1240 | 0 | const InterpFilterParams* filter_params_x = subpel_x_q4 ? &av1_interp_filter_params_list[BILINEAR] : NULL; |
1241 | 0 | const InterpFilterParams* filter_params_y = subpel_y_q4 ? &av1_interp_filter_params_list[BILINEAR] : NULL; |
1242 | 0 | if (subpel_x_q4 != 0 && subpel_y_q4 != 0) { |
1243 | 0 | svt_av1_highbd_convolve_2d_sr( |
1244 | 0 | src, src_stride, dst, dst_stride, w, h, filter_params_x, filter_params_y, 8, 8, conv_params, bd); |
1245 | 0 | } else if (subpel_x_q4 != 0) { |
1246 | 0 | svt_av1_highbd_convolve_x_sr( |
1247 | 0 | src, src_stride, dst, dst_stride, w, h, filter_params_x, filter_params_y, 8, 0, conv_params, bd); |
1248 | 0 | } else { |
1249 | 0 | svt_av1_highbd_convolve_y_sr( |
1250 | 0 | src, src_stride, dst, dst_stride, w, h, filter_params_x, filter_params_y, 0, 8, conv_params, bd); |
1251 | 0 | } |
1252 | 0 | } |
1253 | | |
1254 | | /* |
1255 | | */ |
1256 | | void svt_inter_predictor_light_pd0(const uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t w, |
1257 | 0 | int32_t h, SubpelParams* subpel_params, ConvolveParams* conv_params) { |
1258 | 0 | const int32_t is_scaled = has_scale(subpel_params->xs, subpel_params->ys); |
1259 | 0 | if (is_scaled) { |
1260 | 0 | InterpFilterParams filter_params_x, filter_params_y; |
1261 | 0 | av1_get_convolve_filter_params( |
1262 | 0 | av1_make_interp_filters(EIGHTTAP_REGULAR, EIGHTTAP_REGULAR), &filter_params_x, &filter_params_y, w, h); |
1263 | 0 | svt_av1_convolve_2d_scale(src, |
1264 | 0 | src_stride, |
1265 | 0 | dst, |
1266 | 0 | dst_stride, |
1267 | 0 | w, |
1268 | 0 | h, |
1269 | 0 | &filter_params_x, |
1270 | 0 | &filter_params_y, |
1271 | 0 | subpel_params->subpel_x, |
1272 | 0 | subpel_params->xs, |
1273 | 0 | subpel_params->subpel_y, |
1274 | 0 | subpel_params->ys, |
1275 | 0 | conv_params); |
1276 | 0 | } else { |
1277 | 0 | UNUSED(subpel_params); |
1278 | 0 | svt_aom_convolve[0][0][conv_params->is_compound]( |
1279 | 0 | src, src_stride, dst, dst_stride, w, h, 0, 0, 0, 0, conv_params); |
1280 | 0 | } |
1281 | 0 | } |
1282 | | |
1283 | | void svt_inter_predictor_light_pd1(uint8_t* src, uint8_t* src_2b, int32_t src_stride, uint8_t* dst, int32_t dst_stride, |
1284 | | int32_t w, int32_t h, InterpFilters interp_filters, SubpelParams* subpel_params, |
1285 | 0 | ConvolveParams* conv_params, int32_t bd) { |
1286 | 0 | InterpFilterParams filter_params_x, filter_params_y; |
1287 | 0 | av1_get_convolve_filter_params(interp_filters, &filter_params_x, &filter_params_y, w, h); |
1288 | 0 | const int32_t is_scaled = has_scale(subpel_params->xs, subpel_params->ys); |
1289 | |
|
1290 | 0 | if (bd > EB_EIGHT_BIT) { |
1291 | | // for super-res, the reference frame block might be 2x than predictor in maximum |
1292 | | // for reference scaling, it might be 4x since both width and height is scaled 2x |
1293 | | // should pack enough buffer for scaled reference |
1294 | 0 | DECLARE_ALIGNED(16, uint16_t, src16[PACKED_BUFFER_SIZE * 4]); |
1295 | 0 | int32_t src_stride16; |
1296 | | // pack the reference into temp 16bit buffer |
1297 | 0 | uint8_t offset = INTERPOLATION_OFFSET; |
1298 | 0 | uint32_t width_scale = 1; |
1299 | 0 | uint32_t height_scale = 1; |
1300 | 0 | if (is_scaled) { |
1301 | 0 | width_scale = subpel_params->xs != SCALE_SUBPEL_SHIFTS ? 2 : 1; |
1302 | 0 | height_scale = subpel_params->ys != SCALE_SUBPEL_SHIFTS ? 2 : 1; |
1303 | 0 | } |
1304 | | // optimize stride from MAX_SB_SIZE to bwidth to minimum the block buffer size |
1305 | 0 | src_stride16 = w * width_scale + (offset << 1); |
1306 | | // 16-byte align of src16 |
1307 | 0 | if (src_stride16 % 8) { |
1308 | 0 | src_stride16 = ALIGN_POWER_OF_TWO(src_stride16, 3); |
1309 | 0 | } |
1310 | |
|
1311 | 0 | svt_aom_pack_block(src - offset - (offset * src_stride), |
1312 | 0 | src_stride, |
1313 | 0 | src_2b - offset - (offset * src_stride), |
1314 | 0 | src_stride, |
1315 | 0 | src16, |
1316 | 0 | src_stride16, |
1317 | 0 | w * width_scale + (offset << 1), |
1318 | 0 | h * height_scale + (offset << 1)); |
1319 | 0 | uint16_t* src_10b = src16 + offset + (offset * src_stride16); |
1320 | 0 | uint16_t* dst16 = (uint16_t*)dst; |
1321 | |
|
1322 | 0 | if (is_scaled) { |
1323 | 0 | svt_av1_highbd_convolve_2d_scale(src_10b, |
1324 | 0 | src_stride16, |
1325 | 0 | dst16, |
1326 | 0 | dst_stride, |
1327 | 0 | w, |
1328 | 0 | h, |
1329 | 0 | &filter_params_x, |
1330 | 0 | &filter_params_y, |
1331 | 0 | subpel_params->subpel_x, |
1332 | 0 | subpel_params->xs, |
1333 | 0 | subpel_params->subpel_y, |
1334 | 0 | subpel_params->ys, |
1335 | 0 | conv_params, |
1336 | 0 | bd); |
1337 | 0 | } else { |
1338 | 0 | SubpelParams sp = *subpel_params; |
1339 | 0 | revert_scale_extra_bits(&sp); |
1340 | 0 | svt_aom_convolveHbd[sp.subpel_x != 0][sp.subpel_y != 0][conv_params->is_compound](src_10b, |
1341 | 0 | src_stride16, |
1342 | 0 | dst16, |
1343 | 0 | dst_stride, |
1344 | 0 | w, |
1345 | 0 | h, |
1346 | 0 | &filter_params_x, |
1347 | 0 | &filter_params_y, |
1348 | 0 | sp.subpel_x, |
1349 | 0 | sp.subpel_y, |
1350 | 0 | conv_params, |
1351 | 0 | bd); |
1352 | 0 | } |
1353 | 0 | } else { |
1354 | 0 | if (is_scaled) { |
1355 | 0 | svt_av1_convolve_2d_scale(src, |
1356 | 0 | src_stride, |
1357 | 0 | dst, |
1358 | 0 | dst_stride, |
1359 | 0 | w, |
1360 | 0 | h, |
1361 | 0 | &filter_params_x, |
1362 | 0 | &filter_params_y, |
1363 | 0 | subpel_params->subpel_x, |
1364 | 0 | subpel_params->xs, |
1365 | 0 | subpel_params->subpel_y, |
1366 | 0 | subpel_params->ys, |
1367 | 0 | conv_params); |
1368 | 0 | } else { |
1369 | 0 | SubpelParams sp = *subpel_params; |
1370 | 0 | revert_scale_extra_bits(&sp); |
1371 | 0 | svt_aom_convolve[sp.subpel_x != 0][sp.subpel_y != 0][conv_params->is_compound](src, |
1372 | 0 | src_stride, |
1373 | 0 | dst, |
1374 | 0 | dst_stride, |
1375 | 0 | w, |
1376 | 0 | h, |
1377 | 0 | &filter_params_x, |
1378 | 0 | &filter_params_y, |
1379 | 0 | sp.subpel_x, |
1380 | 0 | sp.subpel_y, |
1381 | 0 | conv_params); |
1382 | 0 | } |
1383 | 0 | } |
1384 | 0 | } |
1385 | | |
1386 | | void svt_inter_predictor(const uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, |
1387 | | const SubpelParams* subpel_params, const ScaleFactors* sf, int32_t w, int32_t h, |
1388 | 0 | ConvolveParams* conv_params, InterpFilters interp_filters, int32_t is_intrabc) { |
1389 | 0 | InterpFilterParams filter_params_x, filter_params_y; |
1390 | 0 | const int32_t is_scaled = has_scale(subpel_params->xs, subpel_params->ys); |
1391 | |
|
1392 | 0 | av1_get_convolve_filter_params(interp_filters, &filter_params_x, &filter_params_y, w, h); |
1393 | |
|
1394 | 0 | assert(conv_params->do_average == 0 || conv_params->do_average == 1); |
1395 | 0 | assert(sf); |
1396 | 0 | UNUSED(sf); |
1397 | 0 | assert(IMPLIES(is_intrabc, !is_scaled)); |
1398 | |
|
1399 | 0 | if (is_scaled) { |
1400 | 0 | if (is_intrabc && (subpel_params->subpel_x != 0 || subpel_params->subpel_y != 0)) { |
1401 | 0 | convolve_2d_for_intrabc( |
1402 | 0 | src, src_stride, dst, dst_stride, w, h, subpel_params->subpel_x, subpel_params->subpel_y, conv_params); |
1403 | 0 | return; |
1404 | 0 | } |
1405 | 0 | if (conv_params->is_compound) { |
1406 | 0 | assert(conv_params->dst != NULL); |
1407 | 0 | } |
1408 | 0 | svt_av1_convolve_2d_scale(src, |
1409 | 0 | src_stride, |
1410 | 0 | dst, |
1411 | 0 | dst_stride, |
1412 | 0 | w, |
1413 | 0 | h, |
1414 | 0 | &filter_params_x, |
1415 | 0 | &filter_params_y, |
1416 | 0 | subpel_params->subpel_x, |
1417 | 0 | subpel_params->xs, |
1418 | 0 | subpel_params->subpel_y, |
1419 | 0 | subpel_params->ys, |
1420 | 0 | conv_params); |
1421 | 0 | } else { |
1422 | 0 | SubpelParams sp = *subpel_params; |
1423 | 0 | revert_scale_extra_bits(&sp); |
1424 | |
|
1425 | 0 | if (is_intrabc && (sp.subpel_x != 0 || sp.subpel_y != 0)) { |
1426 | 0 | convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h, sp.subpel_x, sp.subpel_y, conv_params); |
1427 | 0 | return; |
1428 | 0 | } |
1429 | | |
1430 | 0 | svt_aom_convolve[sp.subpel_x != 0][sp.subpel_y != 0][conv_params->is_compound](src, |
1431 | 0 | src_stride, |
1432 | 0 | dst, |
1433 | 0 | dst_stride, |
1434 | 0 | w, |
1435 | 0 | h, |
1436 | 0 | &filter_params_x, |
1437 | 0 | &filter_params_y, |
1438 | 0 | sp.subpel_x, |
1439 | 0 | sp.subpel_y, |
1440 | 0 | conv_params); |
1441 | 0 | } |
1442 | 0 | } |
1443 | | |
1444 | | void svt_highbd_inter_predictor(const uint16_t* src, int32_t src_stride, uint16_t* dst, int32_t dst_stride, |
1445 | | const SubpelParams* subpel_params, const ScaleFactors* sf, int32_t w, int32_t h, |
1446 | | ConvolveParams* conv_params, InterpFilters interp_filters, int32_t is_intrabc, |
1447 | 0 | int32_t bd) { |
1448 | 0 | InterpFilterParams filter_params_x, filter_params_y; |
1449 | 0 | const int32_t is_scaled = has_scale(subpel_params->xs, subpel_params->ys); |
1450 | |
|
1451 | 0 | av1_get_convolve_filter_params(interp_filters, &filter_params_x, &filter_params_y, w, h); |
1452 | |
|
1453 | 0 | assert(conv_params->do_average == 0 || conv_params->do_average == 1); |
1454 | 0 | assert(sf); |
1455 | 0 | UNUSED(sf); |
1456 | 0 | assert(IMPLIES(is_intrabc, !is_scaled)); |
1457 | |
|
1458 | 0 | if (is_scaled) { |
1459 | 0 | if (is_intrabc && (subpel_params->subpel_x != 0 || subpel_params->subpel_y != 0)) { |
1460 | 0 | highbd_convolve_2d_for_intrabc(src, |
1461 | 0 | src_stride, |
1462 | 0 | dst, |
1463 | 0 | dst_stride, |
1464 | 0 | w, |
1465 | 0 | h, |
1466 | 0 | subpel_params->subpel_x, |
1467 | 0 | subpel_params->subpel_y, |
1468 | 0 | conv_params, |
1469 | 0 | bd); |
1470 | 0 | return; |
1471 | 0 | } |
1472 | 0 | if (conv_params->is_compound) { |
1473 | 0 | assert(conv_params->dst != NULL); |
1474 | 0 | } |
1475 | 0 | svt_av1_highbd_convolve_2d_scale(src, |
1476 | 0 | src_stride, |
1477 | 0 | dst, |
1478 | 0 | dst_stride, |
1479 | 0 | w, |
1480 | 0 | h, |
1481 | 0 | &filter_params_x, |
1482 | 0 | &filter_params_y, |
1483 | 0 | subpel_params->subpel_x, |
1484 | 0 | subpel_params->xs, |
1485 | 0 | subpel_params->subpel_y, |
1486 | 0 | subpel_params->ys, |
1487 | 0 | conv_params, |
1488 | 0 | bd); |
1489 | 0 | } else { |
1490 | 0 | SubpelParams sp = *subpel_params; |
1491 | 0 | revert_scale_extra_bits(&sp); |
1492 | |
|
1493 | 0 | if (is_intrabc && (sp.subpel_x != 0 || sp.subpel_y != 0)) { |
1494 | 0 | highbd_convolve_2d_for_intrabc( |
1495 | 0 | src, src_stride, dst, dst_stride, w, h, sp.subpel_x, sp.subpel_y, conv_params, bd); |
1496 | 0 | return; |
1497 | 0 | } |
1498 | | |
1499 | 0 | svt_aom_convolveHbd[sp.subpel_x != 0][sp.subpel_y != 0][conv_params->is_compound](src, |
1500 | 0 | src_stride, |
1501 | 0 | dst, |
1502 | 0 | dst_stride, |
1503 | 0 | w, |
1504 | 0 | h, |
1505 | 0 | &filter_params_x, |
1506 | 0 | &filter_params_y, |
1507 | 0 | sp.subpel_x, |
1508 | 0 | sp.subpel_y, |
1509 | 0 | conv_params, |
1510 | 0 | bd); |
1511 | 0 | } |
1512 | 0 | } |
1513 | | |
1514 | | #define USE_PRECOMPUTED_WEDGE_SIGN 1 |
1515 | | #define USE_PRECOMPUTED_WEDGE_MASK 1 |
1516 | | |
1517 | | #if USE_PRECOMPUTED_WEDGE_MASK |
1518 | | static const uint8_t wedge_primary_oblique_odd[MASK_PRIMARY_SIZE] = { |
1519 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1520 | | 0, 0, 0, 0, 0, 0, 1, 2, 6, 18, 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, |
1521 | | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
1522 | | }; |
1523 | | static const uint8_t wedge_primary_oblique_even[MASK_PRIMARY_SIZE] = { |
1524 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1525 | | 0, 0, 0, 0, 0, 0, 1, 4, 11, 27, 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, |
1526 | | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
1527 | | }; |
1528 | | static const uint8_t wedge_primary_vertical[MASK_PRIMARY_SIZE] = { |
1529 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1530 | | 0, 0, 0, 0, 0, 0, 0, 2, 7, 21, 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
1531 | | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
1532 | | }; |
1533 | | |
1534 | | DECLARE_ALIGNED(16, static uint8_t, wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = { |
1535 | | { |
1536 | | 0, |
1537 | | 0, |
1538 | | 0, |
1539 | | 0, |
1540 | | 0, |
1541 | | 0, |
1542 | | 0, |
1543 | | 0, |
1544 | | 0, |
1545 | | 0, |
1546 | | 0, |
1547 | | 0, |
1548 | | 0, |
1549 | | 0, |
1550 | | 0, |
1551 | | 0, |
1552 | | }, // not used |
1553 | | { |
1554 | | 0, |
1555 | | 0, |
1556 | | 0, |
1557 | | 0, |
1558 | | 0, |
1559 | | 0, |
1560 | | 0, |
1561 | | 0, |
1562 | | 0, |
1563 | | 0, |
1564 | | 0, |
1565 | | 0, |
1566 | | 0, |
1567 | | 0, |
1568 | | 0, |
1569 | | 0, |
1570 | | }, // not used |
1571 | | { |
1572 | | 0, |
1573 | | 0, |
1574 | | 0, |
1575 | | 0, |
1576 | | 0, |
1577 | | 0, |
1578 | | 0, |
1579 | | 0, |
1580 | | 0, |
1581 | | 0, |
1582 | | 0, |
1583 | | 0, |
1584 | | 0, |
1585 | | 0, |
1586 | | 0, |
1587 | | 0, |
1588 | | }, // not used |
1589 | | { |
1590 | | 1, |
1591 | | 1, |
1592 | | 1, |
1593 | | 1, |
1594 | | 1, |
1595 | | 1, |
1596 | | 1, |
1597 | | 1, |
1598 | | 1, |
1599 | | 1, |
1600 | | 0, |
1601 | | 1, |
1602 | | 1, |
1603 | | 1, |
1604 | | 0, |
1605 | | 1, |
1606 | | }, |
1607 | | { |
1608 | | 1, |
1609 | | 1, |
1610 | | 1, |
1611 | | 1, |
1612 | | 0, |
1613 | | 1, |
1614 | | 1, |
1615 | | 1, |
1616 | | 1, |
1617 | | 1, |
1618 | | 0, |
1619 | | 1, |
1620 | | 1, |
1621 | | 1, |
1622 | | 0, |
1623 | | 1, |
1624 | | }, |
1625 | | { |
1626 | | 1, |
1627 | | 1, |
1628 | | 1, |
1629 | | 1, |
1630 | | 0, |
1631 | | 1, |
1632 | | 1, |
1633 | | 1, |
1634 | | 1, |
1635 | | 1, |
1636 | | 0, |
1637 | | 1, |
1638 | | 1, |
1639 | | 1, |
1640 | | 0, |
1641 | | 1, |
1642 | | }, |
1643 | | { |
1644 | | 1, |
1645 | | 1, |
1646 | | 1, |
1647 | | 1, |
1648 | | 1, |
1649 | | 1, |
1650 | | 1, |
1651 | | 1, |
1652 | | 1, |
1653 | | 1, |
1654 | | 0, |
1655 | | 1, |
1656 | | 1, |
1657 | | 1, |
1658 | | 0, |
1659 | | 1, |
1660 | | }, |
1661 | | { |
1662 | | 1, |
1663 | | 1, |
1664 | | 1, |
1665 | | 1, |
1666 | | 0, |
1667 | | 1, |
1668 | | 1, |
1669 | | 1, |
1670 | | 1, |
1671 | | 1, |
1672 | | 0, |
1673 | | 1, |
1674 | | 1, |
1675 | | 1, |
1676 | | 0, |
1677 | | 1, |
1678 | | }, |
1679 | | { |
1680 | | 1, |
1681 | | 1, |
1682 | | 1, |
1683 | | 1, |
1684 | | 0, |
1685 | | 1, |
1686 | | 1, |
1687 | | 1, |
1688 | | 1, |
1689 | | 1, |
1690 | | 0, |
1691 | | 1, |
1692 | | 1, |
1693 | | 1, |
1694 | | 0, |
1695 | | 1, |
1696 | | }, |
1697 | | { |
1698 | | 1, |
1699 | | 1, |
1700 | | 1, |
1701 | | 1, |
1702 | | 1, |
1703 | | 1, |
1704 | | 1, |
1705 | | 1, |
1706 | | 1, |
1707 | | 1, |
1708 | | 0, |
1709 | | 1, |
1710 | | 1, |
1711 | | 1, |
1712 | | 0, |
1713 | | 1, |
1714 | | }, |
1715 | | { |
1716 | | 0, |
1717 | | 0, |
1718 | | 0, |
1719 | | 0, |
1720 | | 0, |
1721 | | 0, |
1722 | | 0, |
1723 | | 0, |
1724 | | 0, |
1725 | | 0, |
1726 | | 0, |
1727 | | 0, |
1728 | | 0, |
1729 | | 0, |
1730 | | 0, |
1731 | | 0, |
1732 | | }, // not used |
1733 | | { |
1734 | | 0, |
1735 | | 0, |
1736 | | 0, |
1737 | | 0, |
1738 | | 0, |
1739 | | 0, |
1740 | | 0, |
1741 | | 0, |
1742 | | 0, |
1743 | | 0, |
1744 | | 0, |
1745 | | 0, |
1746 | | 0, |
1747 | | 0, |
1748 | | 0, |
1749 | | 0, |
1750 | | }, // not used |
1751 | | { |
1752 | | 0, |
1753 | | 0, |
1754 | | 0, |
1755 | | 0, |
1756 | | 0, |
1757 | | 0, |
1758 | | 0, |
1759 | | 0, |
1760 | | 0, |
1761 | | 0, |
1762 | | 0, |
1763 | | 0, |
1764 | | 0, |
1765 | | 0, |
1766 | | 0, |
1767 | | 0, |
1768 | | }, // not used |
1769 | | { |
1770 | | 0, |
1771 | | 0, |
1772 | | 0, |
1773 | | 0, |
1774 | | 0, |
1775 | | 0, |
1776 | | 0, |
1777 | | 0, |
1778 | | 0, |
1779 | | 0, |
1780 | | 0, |
1781 | | 0, |
1782 | | 0, |
1783 | | 0, |
1784 | | 0, |
1785 | | 0, |
1786 | | }, // not used |
1787 | | { |
1788 | | 0, |
1789 | | 0, |
1790 | | 0, |
1791 | | 0, |
1792 | | 0, |
1793 | | 0, |
1794 | | 0, |
1795 | | 0, |
1796 | | 0, |
1797 | | 0, |
1798 | | 0, |
1799 | | 0, |
1800 | | 0, |
1801 | | 0, |
1802 | | 0, |
1803 | | 0, |
1804 | | }, // not used |
1805 | | { |
1806 | | 0, |
1807 | | 0, |
1808 | | 0, |
1809 | | 0, |
1810 | | 0, |
1811 | | 0, |
1812 | | 0, |
1813 | | 0, |
1814 | | 0, |
1815 | | 0, |
1816 | | 0, |
1817 | | 0, |
1818 | | 0, |
1819 | | 0, |
1820 | | 0, |
1821 | | 0, |
1822 | | }, // not used |
1823 | | { |
1824 | | 0, |
1825 | | 0, |
1826 | | 0, |
1827 | | 0, |
1828 | | 0, |
1829 | | 0, |
1830 | | 0, |
1831 | | 0, |
1832 | | 0, |
1833 | | 0, |
1834 | | 0, |
1835 | | 0, |
1836 | | 0, |
1837 | | 0, |
1838 | | 0, |
1839 | | 0, |
1840 | | }, // not used |
1841 | | { |
1842 | | 0, |
1843 | | 0, |
1844 | | 0, |
1845 | | 0, |
1846 | | 0, |
1847 | | 0, |
1848 | | 0, |
1849 | | 0, |
1850 | | 0, |
1851 | | 0, |
1852 | | 0, |
1853 | | 0, |
1854 | | 0, |
1855 | | 0, |
1856 | | 0, |
1857 | | 0, |
1858 | | }, // not used |
1859 | | { |
1860 | | 1, |
1861 | | 1, |
1862 | | 1, |
1863 | | 1, |
1864 | | 0, |
1865 | | 1, |
1866 | | 1, |
1867 | | 1, |
1868 | | 0, |
1869 | | 1, |
1870 | | 0, |
1871 | | 1, |
1872 | | 1, |
1873 | | 1, |
1874 | | 0, |
1875 | | 1, |
1876 | | }, |
1877 | | { |
1878 | | 1, |
1879 | | 1, |
1880 | | 1, |
1881 | | 1, |
1882 | | 0, |
1883 | | 1, |
1884 | | 1, |
1885 | | 1, |
1886 | | 1, |
1887 | | 1, |
1888 | | 0, |
1889 | | 1, |
1890 | | 0, |
1891 | | 1, |
1892 | | 0, |
1893 | | 1, |
1894 | | }, |
1895 | | { |
1896 | | 0, |
1897 | | 0, |
1898 | | 0, |
1899 | | 0, |
1900 | | 0, |
1901 | | 0, |
1902 | | 0, |
1903 | | 0, |
1904 | | 0, |
1905 | | 0, |
1906 | | 0, |
1907 | | 0, |
1908 | | 0, |
1909 | | 0, |
1910 | | 0, |
1911 | | 0, |
1912 | | }, // not used |
1913 | | { |
1914 | | 0, |
1915 | | 0, |
1916 | | 0, |
1917 | | 0, |
1918 | | 0, |
1919 | | 0, |
1920 | | 0, |
1921 | | 0, |
1922 | | 0, |
1923 | | 0, |
1924 | | 0, |
1925 | | 0, |
1926 | | 0, |
1927 | | 0, |
1928 | | 0, |
1929 | | 0, |
1930 | | }, // not used |
1931 | | }; |
1932 | | |
1933 | | static const WedgeCodeType wedge_codebook_16_hgtw[16] = { |
1934 | | {WEDGE_OBLIQUE27, 4, 4}, |
1935 | | {WEDGE_OBLIQUE63, 4, 4}, |
1936 | | {WEDGE_OBLIQUE117, 4, 4}, |
1937 | | {WEDGE_OBLIQUE153, 4, 4}, |
1938 | | {WEDGE_HORIZONTAL, 4, 2}, |
1939 | | {WEDGE_HORIZONTAL, 4, 4}, |
1940 | | {WEDGE_HORIZONTAL, 4, 6}, |
1941 | | {WEDGE_VERTICAL, 4, 4}, |
1942 | | {WEDGE_OBLIQUE27, 4, 2}, |
1943 | | {WEDGE_OBLIQUE27, 4, 6}, |
1944 | | {WEDGE_OBLIQUE153, 4, 2}, |
1945 | | {WEDGE_OBLIQUE153, 4, 6}, |
1946 | | {WEDGE_OBLIQUE63, 2, 4}, |
1947 | | {WEDGE_OBLIQUE63, 6, 4}, |
1948 | | {WEDGE_OBLIQUE117, 2, 4}, |
1949 | | {WEDGE_OBLIQUE117, 6, 4}, |
1950 | | }; |
1951 | | |
1952 | | static const WedgeCodeType wedge_codebook_16_hltw[16] = { |
1953 | | {WEDGE_OBLIQUE27, 4, 4}, |
1954 | | {WEDGE_OBLIQUE63, 4, 4}, |
1955 | | {WEDGE_OBLIQUE117, 4, 4}, |
1956 | | {WEDGE_OBLIQUE153, 4, 4}, |
1957 | | {WEDGE_VERTICAL, 2, 4}, |
1958 | | {WEDGE_VERTICAL, 4, 4}, |
1959 | | {WEDGE_VERTICAL, 6, 4}, |
1960 | | {WEDGE_HORIZONTAL, 4, 4}, |
1961 | | {WEDGE_OBLIQUE27, 4, 2}, |
1962 | | {WEDGE_OBLIQUE27, 4, 6}, |
1963 | | {WEDGE_OBLIQUE153, 4, 2}, |
1964 | | {WEDGE_OBLIQUE153, 4, 6}, |
1965 | | {WEDGE_OBLIQUE63, 2, 4}, |
1966 | | {WEDGE_OBLIQUE63, 6, 4}, |
1967 | | {WEDGE_OBLIQUE117, 2, 4}, |
1968 | | {WEDGE_OBLIQUE117, 6, 4}, |
1969 | | }; |
1970 | | |
1971 | | static const WedgeCodeType wedge_codebook_16_heqw[16] = { |
1972 | | {WEDGE_OBLIQUE27, 4, 4}, |
1973 | | {WEDGE_OBLIQUE63, 4, 4}, |
1974 | | {WEDGE_OBLIQUE117, 4, 4}, |
1975 | | {WEDGE_OBLIQUE153, 4, 4}, |
1976 | | {WEDGE_HORIZONTAL, 4, 2}, |
1977 | | {WEDGE_HORIZONTAL, 4, 6}, |
1978 | | {WEDGE_VERTICAL, 2, 4}, |
1979 | | {WEDGE_VERTICAL, 6, 4}, |
1980 | | {WEDGE_OBLIQUE27, 4, 2}, |
1981 | | {WEDGE_OBLIQUE27, 4, 6}, |
1982 | | {WEDGE_OBLIQUE153, 4, 2}, |
1983 | | {WEDGE_OBLIQUE153, 4, 6}, |
1984 | | {WEDGE_OBLIQUE63, 2, 4}, |
1985 | | {WEDGE_OBLIQUE63, 6, 4}, |
1986 | | {WEDGE_OBLIQUE117, 2, 4}, |
1987 | | {WEDGE_OBLIQUE117, 6, 4}, |
1988 | | }; |
1989 | | |
1990 | | static const WedgeParamsType wedge_params_lookup[BLOCK_SIZES_ALL] = { |
1991 | | {0, NULL, NULL, NULL}, |
1992 | | {0, NULL, NULL, NULL}, |
1993 | | {0, NULL, NULL, NULL}, |
1994 | | {4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], wedge_masks[BLOCK_8X8]}, |
1995 | | {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], wedge_masks[BLOCK_8X16]}, |
1996 | | {4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], wedge_masks[BLOCK_16X8]}, |
1997 | | {4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], wedge_masks[BLOCK_16X16]}, |
1998 | | {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], wedge_masks[BLOCK_16X32]}, |
1999 | | {4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], wedge_masks[BLOCK_32X16]}, |
2000 | | {4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], wedge_masks[BLOCK_32X32]}, |
2001 | | {0, NULL, NULL, NULL}, |
2002 | | {0, NULL, NULL, NULL}, |
2003 | | {0, NULL, NULL, NULL}, |
2004 | | {0, NULL, NULL, NULL}, |
2005 | | {0, NULL, NULL, NULL}, |
2006 | | {0, NULL, NULL, NULL}, |
2007 | | {0, NULL, NULL, NULL}, |
2008 | | {0, NULL, NULL, NULL}, |
2009 | | {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], wedge_masks[BLOCK_8X32]}, |
2010 | | {4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], wedge_masks[BLOCK_32X8]}, |
2011 | | {0, NULL, NULL, NULL}, |
2012 | | {0, NULL, NULL, NULL}, |
2013 | | }; |
2014 | | |
2015 | 0 | int svt_aom_is_interintra_wedge_used(BlockSize bsize) { |
2016 | 0 | return wedge_params_lookup[bsize].bits > 0; |
2017 | 0 | } |
2018 | | |
2019 | 0 | int32_t svt_aom_get_wedge_bits_lookup(BlockSize bsize) { |
2020 | 0 | return wedge_params_lookup[bsize].bits; |
2021 | 0 | } |
2022 | | |
2023 | 0 | const uint8_t* svt_aom_get_contiguous_soft_mask(int wedge_index, int wedge_sign, BlockSize bsize) { |
2024 | 0 | return wedge_params_lookup[bsize].masks[wedge_sign][wedge_index]; |
2025 | 0 | } |
2026 | | |
2027 | | static void aom_convolve_copy_c(const uint8_t* src, ptrdiff_t src_stride, uint8_t* dst, ptrdiff_t dst_stride, |
2028 | | const int16_t* filter_x, int filter_x_stride, const int16_t* filter_y, |
2029 | 288 | int filter_y_stride, int w, int h) { |
2030 | 288 | (void)filter_x; |
2031 | 288 | (void)filter_x_stride; |
2032 | 288 | (void)filter_y; |
2033 | 288 | (void)filter_y_stride; |
2034 | | |
2035 | 5.66k | for (int r = h; r > 0; --r) { |
2036 | 5.37k | svt_memcpy(dst, src, w); |
2037 | 5.37k | src += src_stride; |
2038 | 5.37k | dst += dst_stride; |
2039 | 5.37k | } |
2040 | 288 | } |
2041 | | |
2042 | 64 | static void shift_copy(const uint8_t* src, uint8_t* dst, int shift, int width) { |
2043 | 64 | if (shift >= 0) { |
2044 | 33 | svt_memcpy(dst + shift, src, width - shift); |
2045 | 33 | memset(dst, src[0], shift); |
2046 | 33 | } else { |
2047 | 31 | shift = -shift; |
2048 | 31 | svt_memcpy(dst, src + shift, width - shift); |
2049 | 31 | memset(dst + width - shift, src[width - 1], shift); |
2050 | 31 | } |
2051 | 64 | } |
2052 | | |
2053 | 0 | int svt_aom_get_wedge_params_bits(BlockSize bsize) { |
2054 | 0 | return wedge_params_lookup[bsize].bits; |
2055 | 0 | } |
2056 | | |
2057 | | #endif // USE_PRECOMPUTED_WEDGE_MASK |
2058 | | |
2059 | | // [negative][direction] |
2060 | | DECLARE_ALIGNED(16, static uint8_t, wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_PRIMARY_SIZE * MASK_PRIMARY_SIZE]); |
2061 | | |
2062 | | // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound |
2063 | | // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. |
2064 | | DECLARE_ALIGNED(16, static uint8_t, wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]); |
2065 | | |
2066 | 1 | static void init_wedge_primary_masks() { |
2067 | 1 | const int w = MASK_PRIMARY_SIZE; |
2068 | 1 | const int h = MASK_PRIMARY_SIZE; |
2069 | 1 | const int stride = MASK_PRIMARY_STRIDE; |
2070 | | // Note: index [0] stores the primary, and [1] its complement. |
2071 | 1 | #if USE_PRECOMPUTED_WEDGE_MASK |
2072 | | // Generate prototype by shifting the primary |
2073 | 1 | int shift = h / 4; |
2074 | 33 | for (int i = 0; i < h; i += 2) { |
2075 | 32 | shift_copy( |
2076 | 32 | wedge_primary_oblique_even, &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift, MASK_PRIMARY_SIZE); |
2077 | 32 | shift--; |
2078 | 32 | shift_copy( |
2079 | 32 | wedge_primary_oblique_odd, &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift, MASK_PRIMARY_SIZE); |
2080 | 32 | svt_memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride], |
2081 | 32 | wedge_primary_vertical, |
2082 | 32 | MASK_PRIMARY_SIZE * sizeof(wedge_primary_vertical[0])); |
2083 | 32 | svt_memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride], |
2084 | 32 | wedge_primary_vertical, |
2085 | 32 | MASK_PRIMARY_SIZE * sizeof(wedge_primary_vertical[0])); |
2086 | 32 | } |
2087 | | #else |
2088 | | static const double smoother_param = 2.85; |
2089 | | const int a[2] = {2, 1}; |
2090 | | const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]); |
2091 | | for (int i = 0; i < h; i++) { |
2092 | | for (int j = 0; j < w; ++j) { |
2093 | | int x = (2 * j + 1 - w); |
2094 | | int y = (2 * i + 1 - h); |
2095 | | double d = (a[0] * x + a[1] * y) / asqrt; |
2096 | | const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32); |
2097 | | wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk; |
2098 | | const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32); |
2099 | | wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx; |
2100 | | } |
2101 | | } |
2102 | | #endif // USE_PRECOMPUTED_WEDGE_MASK |
2103 | 65 | for (int i = 0; i < h; ++i) { |
2104 | 4.16k | for (int j = 0; j < w; ++j) { |
2105 | 4.09k | const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j]; |
2106 | 4.09k | wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk; |
2107 | 4.09k | wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = |
2108 | 4.09k | wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = (1 << WEDGE_WEIGHT_BITS) - msk; |
2109 | 4.09k | wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] = wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] = |
2110 | 4.09k | (1 << WEDGE_WEIGHT_BITS) - msk; |
2111 | 4.09k | wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = |
2112 | 4.09k | wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk; |
2113 | 4.09k | const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j]; |
2114 | 4.09k | wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx; |
2115 | 4.09k | wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] = wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] = |
2116 | 4.09k | (1 << WEDGE_WEIGHT_BITS) - mskx; |
2117 | 4.09k | } |
2118 | 64 | } |
2119 | 1 | } |
2120 | | |
2121 | | #if !USE_PRECOMPUTED_WEDGE_SIGN |
2122 | | // If the signs for the wedges for various BLOCK_SIZES are |
2123 | | // inconsistent flip the sign flag. Do it only once for every |
2124 | | // wedge codebook. |
2125 | | static void init_wedge_signs() { |
2126 | | memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup)); |
2127 | | for (BlockSize bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) { |
2128 | | const int bw = block_size_wide[bsize]; |
2129 | | const int bh = block_size_high[bsize]; |
2130 | | const wedge_params_type wedge_params = wedge_params_lookup[bsize]; |
2131 | | const int wbits = wedge_params.bits; |
2132 | | const int wtypes = 1 << wbits; |
2133 | | |
2134 | | if (wbits) { |
2135 | | for (int w = 0; w < wtypes; ++w) { |
2136 | | // Get the mask primary, i.e. index [0] |
2137 | | const uint8_t* mask = get_wedge_mask_inplace(w, 0, bsize); |
2138 | | int avg = 0; |
2139 | | for (int i = 0; i < bw; ++i) { |
2140 | | avg += mask[i]; |
2141 | | } |
2142 | | for (int i = 1; i < bh; ++i) { |
2143 | | avg += mask[i * MASK_PRIMARY_STRIDE]; |
2144 | | } |
2145 | | avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1); |
2146 | | // Default sign of this wedge is 1 if the average < 32, 0 otherwise. |
2147 | | // If default sign is 1: |
2148 | | // If sign requested is 0, we need to flip the sign and return |
2149 | | // the complement i.e. index [1] instead. If sign requested is 1 |
2150 | | // we need to flip the sign and return index [0] instead. |
2151 | | // If default sign is 0: |
2152 | | // If sign requested is 0, we need to return index [0] the primary |
2153 | | // if sign requested is 1, we need to return the complement index [1] |
2154 | | // instead. |
2155 | | wedge_params.signflip[w] = (avg < 32); |
2156 | | } |
2157 | | } |
2158 | | } |
2159 | | } |
2160 | | #endif // !USE_PRECOMPUTED_WEDGE_SIGN |
2161 | | |
2162 | 288 | static const uint8_t* get_wedge_mask_inplace(int wedge_index, int neg, BlockSize bsize) { |
2163 | 288 | const int bh = block_size_high[bsize]; |
2164 | 288 | const int bw = block_size_wide[bsize]; |
2165 | | |
2166 | 288 | assert(wedge_index >= 0 && wedge_index < (1 << svt_aom_get_wedge_bits_lookup(bsize))); |
2167 | 288 | const WedgeCodeType* a = wedge_params_lookup[bsize].codebook + wedge_index; |
2168 | 288 | int woff, hoff; |
2169 | 288 | const uint8_t wsignflip = wedge_params_lookup[bsize].signflip[wedge_index]; |
2170 | | |
2171 | 288 | woff = (a->x_offset * bw) >> 3; |
2172 | 288 | hoff = (a->y_offset * bh) >> 3; |
2173 | 288 | return wedge_mask_obl[neg ^ wsignflip][a->direction] + MASK_PRIMARY_STRIDE * (MASK_PRIMARY_SIZE / 2 - hoff) + |
2174 | 288 | MASK_PRIMARY_SIZE / 2 - woff; |
2175 | 288 | } |
2176 | | |
2177 | 1 | static void init_wedge_masks() { |
2178 | 1 | uint8_t* dst = wedge_mask_buf; |
2179 | 1 | memset(wedge_masks, 0, sizeof(wedge_masks)); |
2180 | 23 | for (BlockSize bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) { |
2181 | 22 | const int bw = block_size_wide[bsize]; |
2182 | 22 | const int bh = block_size_high[bsize]; |
2183 | 22 | const WedgeParamsType* wedge_params = &wedge_params_lookup[bsize]; |
2184 | 22 | const int wbits = wedge_params->bits; |
2185 | 22 | const int wtypes = 1 << wbits; |
2186 | 22 | if (wbits == 0) { |
2187 | 13 | continue; |
2188 | 13 | } |
2189 | 153 | for (int w = 0; w < wtypes; ++w) { |
2190 | 144 | const uint8_t* mask; |
2191 | 144 | mask = get_wedge_mask_inplace(w, 0, bsize); |
2192 | 144 | aom_convolve_copy_c(mask, MASK_PRIMARY_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, bh); |
2193 | 144 | wedge_params->masks[0][w] = dst; |
2194 | 144 | dst += bw * bh; |
2195 | | |
2196 | 144 | mask = get_wedge_mask_inplace(w, 1, bsize); |
2197 | 144 | aom_convolve_copy_c(mask, MASK_PRIMARY_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, bh); |
2198 | 144 | wedge_params->masks[1][w] = dst; |
2199 | 144 | dst += bw * bh; |
2200 | 144 | } |
2201 | 9 | assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf)); |
2202 | 9 | } |
2203 | 1 | } |
2204 | | |
2205 | | // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0 |
2206 | 1 | void svt_av1_init_wedge_masks(void) { |
2207 | 1 | init_wedge_primary_masks(); |
2208 | | #if !USE_PRECOMPUTED_WEDGE_SIGN |
2209 | | init_wedge_signs(); |
2210 | | #endif // !USE_PRECOMPUTED_WEDGE_SIGN |
2211 | 1 | init_wedge_masks(); |
2212 | 1 | } |
2213 | | |
2214 | | int svt_aom_is_masked_compound_type(COMPOUND_TYPE type); |
2215 | | |
2216 | | /* clang-format off */ |
2217 | | static const uint8_t ii_weights1d[MAX_SB_SIZE] = { |
2218 | | 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, |
2219 | | 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16, |
2220 | | 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, |
2221 | | 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, |
2222 | | 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, |
2223 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, |
2224 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
2225 | | }; |
2226 | | static const uint8_t ii_size_scales[BLOCK_SIZES_ALL] = { |
2227 | | 32, 16, 16, 16, 8, 8, 8, 4, |
2228 | | 4, 4, 2, 2, 2, 1, 1, 1, |
2229 | | 8, 8, 4, 4, 2, 2 |
2230 | | }; |
2231 | | /* clang-format on */ |
2232 | | |
2233 | 40 | static void build_smooth_interintra_mask(uint8_t* mask, int stride, BlockSize plane_bsize, InterIntraMode mode) { |
2234 | 40 | const int bw = block_size_wide[plane_bsize]; |
2235 | 40 | const int bh = block_size_high[plane_bsize]; |
2236 | 40 | const int size_scale = ii_size_scales[plane_bsize]; |
2237 | | |
2238 | 40 | switch (mode) { |
2239 | 10 | case II_V_PRED: |
2240 | 154 | for (int i = 0; i < bh; ++i) { |
2241 | 144 | memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0])); |
2242 | 144 | mask += stride; |
2243 | 144 | } |
2244 | 10 | break; |
2245 | | |
2246 | 10 | case II_H_PRED: |
2247 | 154 | for (int i = 0; i < bh; ++i) { |
2248 | 2.84k | for (int j = 0; j < bw; ++j) { |
2249 | 2.70k | mask[j] = ii_weights1d[j * size_scale]; |
2250 | 2.70k | } |
2251 | 144 | mask += stride; |
2252 | 144 | } |
2253 | 10 | break; |
2254 | | |
2255 | 10 | case II_SMOOTH_PRED: |
2256 | 154 | for (int i = 0; i < bh; ++i) { |
2257 | 2.84k | for (int j = 0; j < bw; ++j) { |
2258 | 2.70k | mask[j] = ii_weights1d[(i < j ? i : j) * size_scale]; |
2259 | 2.70k | } |
2260 | 144 | mask += stride; |
2261 | 144 | } |
2262 | 10 | break; |
2263 | | |
2264 | 10 | case II_DC_PRED: |
2265 | 10 | default: |
2266 | 154 | for (int i = 0; i < bh; ++i) { |
2267 | 144 | memset(mask, 32, bw * sizeof(mask[0])); |
2268 | 144 | mask += stride; |
2269 | 144 | } |
2270 | 10 | break; |
2271 | 40 | } |
2272 | 40 | } |
2273 | | |
2274 | | // ii_masks stores the actual masks. We use smooth_ii_masks to access ii_masks so that we can index the array |
2275 | | // directly with the bsize (BlockSize that would be passed when doing the prediction) without using the extra memory |
2276 | | // to store empty, unused masks for the BLOCK_SIZES that don't allow inter-intra |
2277 | | static uint8_t ii_masks[BLOCK_32X32 - BLOCK_4X4 + 1][INTERINTRA_MODES][MAX_INTERINTRA_SB_SQUARE]; |
2278 | | static uint8_t* smooth_ii_masks[BLOCK_SIZES_ALL][INTERINTRA_MODES]; |
2279 | | |
2280 | | // Initialize the masks used for inter-intra compound blending. Inter-intra is allowed for 8x8-32x32 blocks, but |
2281 | | // masks must be generated down to 4x4 because of chroma. The stride of each mask is the block width. |
2282 | 1 | void init_ii_masks(void) { |
2283 | 1 | memset(smooth_ii_masks, 0 /*NULL*/, sizeof(smooth_ii_masks)); |
2284 | 11 | for (BlockSize bsize = BLOCK_4X4; bsize <= BLOCK_32X32; ++bsize) { |
2285 | 10 | const int bw = block_size_wide[bsize]; |
2286 | 50 | for (InterIntraMode ii_mode = II_DC_PRED; ii_mode < INTERINTRA_MODES; ii_mode++) { |
2287 | 40 | build_smooth_interintra_mask(ii_masks[bsize - BLOCK_4X4][ii_mode], bw, bsize, ii_mode); |
2288 | 40 | smooth_ii_masks[bsize][ii_mode] = ii_masks[bsize - BLOCK_4X4][ii_mode]; |
2289 | 40 | } |
2290 | 10 | } |
2291 | 1 | } |
2292 | | |
2293 | | // mask stride is block width |
2294 | 0 | static uint8_t* get_ii_mask(BlockSize bsize, InterIntraMode ii_mode) { |
2295 | 0 | return smooth_ii_masks[bsize][ii_mode]; |
2296 | 0 | } |
2297 | | |
2298 | | void svt_aom_combine_interintra_highbd(InterIntraMode mode, uint8_t use_wedge_interintra, uint8_t wedge_index, |
2299 | | uint8_t wedge_sign, BlockSize bsize, BlockSize plane_bsize, uint8_t* comppred8, |
2300 | | int compstride, const uint8_t* interpred8, int interstride, |
2301 | 0 | const uint8_t* intrapred8, int intrastride, int bd) { |
2302 | 0 | const int bw = block_size_wide[plane_bsize]; |
2303 | 0 | const int bh = block_size_high[plane_bsize]; |
2304 | |
|
2305 | 0 | if (use_wedge_interintra) { |
2306 | 0 | if (svt_aom_is_interintra_wedge_used(bsize)) { |
2307 | 0 | const uint8_t* mask = svt_aom_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); |
2308 | 0 | const int subh = 2 * mi_size_high[bsize] == bh; |
2309 | 0 | const int subw = 2 * mi_size_wide[bsize] == bw; |
2310 | 0 | svt_aom_highbd_blend_a64_mask(comppred8, |
2311 | 0 | compstride, |
2312 | 0 | intrapred8, |
2313 | 0 | intrastride, |
2314 | 0 | interpred8, |
2315 | 0 | interstride, |
2316 | 0 | mask, |
2317 | 0 | block_size_wide[bsize], |
2318 | 0 | bw, |
2319 | 0 | bh, |
2320 | 0 | subw, |
2321 | 0 | subh, |
2322 | 0 | bd); |
2323 | 0 | } |
2324 | 0 | return; |
2325 | 0 | } |
2326 | | |
2327 | 0 | uint8_t* mask = get_ii_mask(plane_bsize, mode); |
2328 | 0 | svt_aom_highbd_blend_a64_mask( |
2329 | 0 | comppred8, compstride, intrapred8, intrastride, interpred8, interstride, mask, bw, bw, bh, 0, 0, bd); |
2330 | 0 | } |
2331 | | |
2332 | | static const uint8_t* av1_get_compound_type_mask(const InterInterCompoundData* const comp_data, uint8_t* seg_mask, |
2333 | 0 | BlockSize bsize) { |
2334 | 0 | assert(svt_aom_is_masked_compound_type(comp_data->type)); |
2335 | 0 | (void)bsize; |
2336 | 0 | switch (comp_data->type) { |
2337 | 0 | case COMPOUND_WEDGE: |
2338 | 0 | return svt_aom_get_contiguous_soft_mask(comp_data->wedge_index, comp_data->wedge_sign, bsize); |
2339 | 0 | case COMPOUND_DIFFWTD: |
2340 | 0 | return seg_mask; |
2341 | 0 | default: |
2342 | 0 | assert(0); |
2343 | 0 | return NULL; |
2344 | 0 | } |
2345 | 0 | } |
2346 | | |
2347 | | void svt_aom_build_masked_compound_no_round(uint8_t* dst, int dst_stride, const CONV_BUF_TYPE* src0, int src0_stride, |
2348 | | const CONV_BUF_TYPE* src1, int src1_stride, |
2349 | | const InterInterCompoundData* const comp_data, uint8_t* seg_mask, |
2350 | | BlockSize bsize, int h, int w, ConvolveParams* conv_params, |
2351 | 0 | uint8_t bit_depth, bool is_16bit) { |
2352 | | // Derive subsampling from h and w passed in. May be refactored to |
2353 | | // pass in subsampling factors directly. |
2354 | 0 | const int subh = (2 << mi_size_high_log2[bsize]) == h; |
2355 | 0 | const int subw = (2 << mi_size_wide_log2[bsize]) == w; |
2356 | 0 | const uint8_t* mask = av1_get_compound_type_mask(comp_data, seg_mask, bsize); |
2357 | |
|
2358 | 0 | if (is_16bit) { |
2359 | 0 | svt_aom_highbd_blend_a64_d16_mask(dst, |
2360 | 0 | dst_stride, |
2361 | 0 | src0, |
2362 | 0 | src0_stride, |
2363 | 0 | src1, |
2364 | 0 | src1_stride, |
2365 | 0 | mask, |
2366 | 0 | block_size_wide[bsize], |
2367 | 0 | w, |
2368 | 0 | h, |
2369 | 0 | subw, |
2370 | 0 | subh, |
2371 | 0 | conv_params, |
2372 | 0 | bit_depth); |
2373 | 0 | } else { |
2374 | 0 | svt_aom_lowbd_blend_a64_d16_mask(dst, |
2375 | 0 | dst_stride, |
2376 | 0 | src0, |
2377 | 0 | src0_stride, |
2378 | 0 | src1, |
2379 | 0 | src1_stride, |
2380 | 0 | mask, |
2381 | 0 | block_size_wide[bsize], |
2382 | 0 | w, |
2383 | 0 | h, |
2384 | 0 | subw, |
2385 | 0 | subh, |
2386 | 0 | conv_params); |
2387 | 0 | } |
2388 | 0 | } |
2389 | | |
2390 | 0 | void svt_aom_find_ref_dv(Mv* ref_dv, const TileInfo* const tile, int mib_size, int mi_row, int mi_col) { |
2391 | 0 | (void)mi_col; |
2392 | 0 | if (mi_row - mib_size < tile->mi_row_start) { |
2393 | 0 | ref_dv->y = 0; |
2394 | 0 | ref_dv->x = -MI_SIZE * mib_size - INTRABC_DELAY_PIXELS; |
2395 | 0 | } else { |
2396 | 0 | ref_dv->y = -MI_SIZE * mib_size; |
2397 | 0 | ref_dv->x = 0; |
2398 | 0 | } |
2399 | 0 | ref_dv->y *= 8; |
2400 | 0 | ref_dv->x *= 8; |
2401 | 0 | } |
2402 | | #if CONFIG_ENABLE_OBMC |
2403 | 0 | int svt_av1_skip_u4x4_pred_in_obmc(BlockSize bsize, int dir, int subsampling_x, int subsampling_y) { |
2404 | 0 | assert(is_motion_variation_allowed_bsize(bsize)); |
2405 | |
|
2406 | 0 | const BlockSize bsize_plane = get_plane_block_size(bsize, subsampling_x, subsampling_y); |
2407 | 0 | switch (bsize_plane) { |
2408 | | #if DISABLE_CHROMA_U8X8_OBMC |
2409 | | case BLOCK_4X4: |
2410 | | case BLOCK_8X4: |
2411 | | case BLOCK_4X8: |
2412 | | return 1; |
2413 | | break; |
2414 | | #else |
2415 | 0 | case BLOCK_4X4: |
2416 | 0 | case BLOCK_8X4: |
2417 | 0 | case BLOCK_4X8: |
2418 | 0 | return dir == 0; |
2419 | 0 | break; |
2420 | 0 | #endif |
2421 | 0 | default: |
2422 | 0 | return 0; |
2423 | 0 | } |
2424 | 0 | } |
2425 | | #endif |
2426 | | |
2427 | 0 | #define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS) |
2428 | | |
2429 | | /** |
2430 | | * Computes SSE of a compound predictor constructed from 2 fundamental |
2431 | | * predictors p0 and p1 using blending with mask. |
2432 | | * |
2433 | | * r1: Residuals of p1. |
2434 | | * (source - p1) |
2435 | | * d: Difference of p1 and p0. |
2436 | | * (p1 - p0) |
2437 | | * m: The blending mask |
2438 | | * N: Number of pixels |
2439 | | * |
2440 | | * 'r1', 'd', and 'm' are contiguous. |
2441 | | * |
2442 | | * Computes: |
2443 | | * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to: |
2444 | | * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2), |
2445 | | * where r0 is (source - p0), and r1 is (source - p1), which is in turn |
2446 | | * is equivalent to: |
2447 | | * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2), |
2448 | | * which is the SSE of the residuals of the compound predictor scaled up by |
2449 | | * MAX_MASK_VALUE**2. |
2450 | | * |
2451 | | * Note that we clamp the partial term in the loop to 16 bits signed. This is |
2452 | | * to facilitate equivalent SIMD implementation. It should have no effect if |
2453 | | * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always |
2454 | | * holds for 8 bit input, and on real input, it should hold practically always, |
2455 | | * as residuals are expected to be small. |
2456 | | */ |
2457 | 0 | uint64_t svt_av1_wedge_sse_from_residuals_c(const int16_t* r1, const int16_t* d, const uint8_t* m, int N) { |
2458 | 0 | uint64_t csse = 0; |
2459 | |
|
2460 | 0 | for (int i = 0; i < N; i++) { |
2461 | 0 | int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i]; |
2462 | 0 | t = clamp(t, INT16_MIN, INT16_MAX); |
2463 | 0 | csse += t * t; |
2464 | 0 | } |
2465 | 0 | return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS); |
2466 | 0 | } |
2467 | | |
2468 | | void svt_aom_combine_interintra(InterIntraMode mode, int8_t use_wedge_interintra, int wedge_index, int wedge_sign, |
2469 | | BlockSize bsize, BlockSize plane_bsize, uint8_t* comppred, int compstride, |
2470 | 0 | const uint8_t* interpred, int interstride, const uint8_t* intrapred, int intrastride) { |
2471 | 0 | const int bw = block_size_wide[plane_bsize]; |
2472 | 0 | const int bh = block_size_high[plane_bsize]; |
2473 | |
|
2474 | 0 | if (use_wedge_interintra) { |
2475 | 0 | if (svt_aom_is_interintra_wedge_used(bsize)) { |
2476 | 0 | const uint8_t* mask = svt_aom_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); |
2477 | 0 | const int subw = 2 * mi_size_wide[bsize] == bw; |
2478 | 0 | const int subh = 2 * mi_size_high[bsize] == bh; |
2479 | 0 | svt_aom_blend_a64_mask(comppred, |
2480 | 0 | compstride, |
2481 | 0 | intrapred, |
2482 | 0 | intrastride, |
2483 | 0 | interpred, |
2484 | 0 | interstride, |
2485 | 0 | mask, |
2486 | 0 | block_size_wide[bsize], |
2487 | 0 | bw, |
2488 | 0 | bh, |
2489 | 0 | subw, |
2490 | 0 | subh); |
2491 | 0 | } |
2492 | 0 | return; |
2493 | 0 | } else { |
2494 | 0 | uint8_t* mask = get_ii_mask(plane_bsize, mode); |
2495 | 0 | svt_aom_blend_a64_mask( |
2496 | 0 | comppred, compstride, intrapred, intrastride, interpred, interstride, mask, bw, bw, bh, 0, 0); |
2497 | 0 | } |
2498 | 0 | } |
2499 | | |
2500 | | void svt_aom_highbd_blend_a64_hmask_16bit_c(uint16_t* dst, uint32_t dst_stride, const uint16_t* src0, |
2501 | | uint32_t src0_stride, const uint16_t* src1, uint32_t src1_stride, |
2502 | 0 | const uint8_t* mask, int w, int h, int bd) { |
2503 | 0 | (void)bd; |
2504 | |
|
2505 | 0 | assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); |
2506 | 0 | assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); |
2507 | |
|
2508 | 0 | assert(h >= 1); |
2509 | 0 | assert(w >= 1); |
2510 | 0 | assert(IS_POWER_OF_TWO(h)); |
2511 | 0 | assert(IS_POWER_OF_TWO(w)); |
2512 | |
|
2513 | 0 | assert(bd == 8 || bd == 10 || bd == 12); |
2514 | |
|
2515 | 0 | for (int i = 0; i < h; ++i) { |
2516 | 0 | for (int j = 0; j < w; ++j) { |
2517 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]); |
2518 | 0 | } |
2519 | 0 | } |
2520 | 0 | } |
2521 | | |
2522 | 0 | uint64_t svt_aom_sum_squares_i16_c(const int16_t* src, uint32_t n) { |
2523 | 0 | uint64_t ss = 0; |
2524 | 0 | do { |
2525 | 0 | const int16_t v = *src++; |
2526 | 0 | ss += v * v; |
2527 | 0 | } while (--n); |
2528 | |
|
2529 | 0 | return ss; |
2530 | 0 | } |
2531 | | |
2532 | | // obmc_mask_N[overlap_position] |
2533 | | static const uint8_t obmc_mask_1[1] = {64}; |
2534 | | DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = {45, 64}; |
2535 | | |
2536 | | DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = {39, 50, 59, 64}; |
2537 | | |
2538 | | static const uint8_t obmc_mask_8[8] = {36, 42, 48, 53, 57, 61, 64, 64}; |
2539 | | |
2540 | | static const uint8_t obmc_mask_16[16] = {34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 64, 64, 64, 64}; |
2541 | | |
2542 | | static const uint8_t obmc_mask_32[32] = {33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, |
2543 | | 56, 57, 58, 59, 60, 60, 61, 62, 64, 64, 64, 64, 64, 64, 64, 64}; |
2544 | | |
2545 | 0 | const uint8_t* svt_av1_get_obmc_mask(int length) { |
2546 | 0 | switch (length) { |
2547 | 0 | case 1: |
2548 | 0 | return obmc_mask_1; |
2549 | 0 | case 2: |
2550 | 0 | return obmc_mask_2; |
2551 | 0 | case 4: |
2552 | 0 | return obmc_mask_4; |
2553 | 0 | case 8: |
2554 | 0 | return obmc_mask_8; |
2555 | 0 | case 16: |
2556 | 0 | return obmc_mask_16; |
2557 | 0 | case 32: |
2558 | 0 | return obmc_mask_32; |
2559 | 0 | default: |
2560 | 0 | assert(0); |
2561 | 0 | return NULL; |
2562 | 0 | } |
2563 | 0 | } |
2564 | | |
2565 | 0 | int16_t svt_aom_mode_context_analyzer(int16_t mode_context, const MvReferenceFrame* const rf) { |
2566 | 0 | static unsigned svt_aom_compound_mode_ctx_map[3][COMP_NEWMV_CTXS] = { |
2567 | 0 | {0, 1, 1, 1, 1}, |
2568 | 0 | {1, 2, 3, 4, 4}, |
2569 | 0 | {4, 4, 5, 6, 7}, |
2570 | 0 | }; |
2571 | |
|
2572 | 0 | if (rf[1] <= INTRA_FRAME) { |
2573 | 0 | return mode_context; |
2574 | 0 | } |
2575 | | |
2576 | 0 | const unsigned newmv_ctx = mode_context & NEWMV_CTX_MASK; |
2577 | 0 | const unsigned refmv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; |
2578 | 0 | assert((refmv_ctx >> 1) < 3); |
2579 | 0 | const unsigned comp_ctx = svt_aom_compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN(newmv_ctx, COMP_NEWMV_CTXS - 1)]; |
2580 | 0 | return comp_ctx; |
2581 | 0 | } |