/src/aom/av1/encoder/pickcdef.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <math.h> |
13 | | #include <stdbool.h> |
14 | | #include <string.h> |
15 | | |
16 | | #include "config/aom_dsp_rtcd.h" |
17 | | #include "config/aom_scale_rtcd.h" |
18 | | |
19 | | #include "aom/aom_integer.h" |
20 | | #include "av1/common/av1_common_int.h" |
21 | | #include "av1/common/reconinter.h" |
22 | | #include "av1/encoder/encoder.h" |
23 | | #include "av1/encoder/ethread.h" |
24 | | #include "av1/encoder/pickcdef.h" |
25 | | #include "av1/encoder/mcomp.h" |
26 | | |
27 | | // Get primary and secondary filter strength for the given strength index and |
28 | | // search method |
29 | | static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method, |
30 | | int *pri_strength, |
31 | | int *sec_strength, |
32 | 0 | int strength_idx) { |
33 | 0 | const int tot_sec_filter = |
34 | 0 | (pick_method == CDEF_FAST_SEARCH_LVL5) |
35 | 0 | ? REDUCED_SEC_STRENGTHS_LVL5 |
36 | 0 | : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3 |
37 | 0 | : CDEF_SEC_STRENGTHS); |
38 | 0 | const int pri_idx = strength_idx / tot_sec_filter; |
39 | 0 | const int sec_idx = strength_idx % tot_sec_filter; |
40 | 0 | *pri_strength = pri_idx; |
41 | 0 | *sec_strength = sec_idx; |
42 | 0 | if (pick_method == CDEF_FULL_SEARCH) return; |
43 | | |
44 | 0 | switch (pick_method) { |
45 | 0 | case CDEF_FAST_SEARCH_LVL1: |
46 | 0 | assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1); |
47 | 0 | *pri_strength = priconv_lvl1[pri_idx]; |
48 | 0 | break; |
49 | 0 | case CDEF_FAST_SEARCH_LVL2: |
50 | 0 | assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2); |
51 | 0 | *pri_strength = priconv_lvl2[pri_idx]; |
52 | 0 | break; |
53 | 0 | case CDEF_FAST_SEARCH_LVL3: |
54 | 0 | assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2); |
55 | 0 | assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3); |
56 | 0 | *pri_strength = priconv_lvl2[pri_idx]; |
57 | 0 | *sec_strength = secconv_lvl3[sec_idx]; |
58 | 0 | break; |
59 | 0 | case CDEF_FAST_SEARCH_LVL4: |
60 | 0 | assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4); |
61 | 0 | assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3); |
62 | 0 | *pri_strength = priconv_lvl4[pri_idx]; |
63 | 0 | *sec_strength = secconv_lvl3[sec_idx]; |
64 | 0 | break; |
65 | 0 | case CDEF_FAST_SEARCH_LVL5: |
66 | 0 | assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4); |
67 | 0 | assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5); |
68 | 0 | *pri_strength = priconv_lvl5[pri_idx]; |
69 | 0 | *sec_strength = secconv_lvl5[sec_idx]; |
70 | 0 | break; |
71 | 0 | default: assert(0 && "Invalid CDEF search method"); |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | // Store CDEF filter strength calculated from strength index for given search |
76 | | // method |
77 | | #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \ |
78 | 0 | do { \ |
79 | 0 | get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \ |
80 | 0 | (strength_idx)); \ |
81 | 0 | cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; \ |
82 | 0 | } while (0) |
83 | | |
84 | | /* Search for the best strength to add as an option, knowing we |
85 | | already selected nb_strengths options. */ |
86 | | static uint64_t search_one(int *lev, int nb_strengths, |
87 | | uint64_t mse[][TOTAL_STRENGTHS], int sb_count, |
88 | 0 | CDEF_PICK_METHOD pick_method) { |
89 | 0 | uint64_t tot_mse[TOTAL_STRENGTHS]; |
90 | 0 | const int total_strengths = nb_cdef_strengths[pick_method]; |
91 | 0 | int i, j; |
92 | 0 | uint64_t best_tot_mse = (uint64_t)1 << 63; |
93 | 0 | int best_id = 0; |
94 | 0 | memset(tot_mse, 0, sizeof(tot_mse)); |
95 | 0 | for (i = 0; i < sb_count; i++) { |
96 | 0 | int gi; |
97 | 0 | uint64_t best_mse = (uint64_t)1 << 63; |
98 | | /* Find best mse among already selected options. */ |
99 | 0 | for (gi = 0; gi < nb_strengths; gi++) { |
100 | 0 | if (mse[i][lev[gi]] < best_mse) { |
101 | 0 | best_mse = mse[i][lev[gi]]; |
102 | 0 | } |
103 | 0 | } |
104 | | /* Find best mse when adding each possible new option. */ |
105 | 0 | for (j = 0; j < total_strengths; j++) { |
106 | 0 | uint64_t best = best_mse; |
107 | 0 | if (mse[i][j] < best) best = mse[i][j]; |
108 | 0 | tot_mse[j] += best; |
109 | 0 | } |
110 | 0 | } |
111 | 0 | for (j = 0; j < total_strengths; j++) { |
112 | 0 | if (tot_mse[j] < best_tot_mse) { |
113 | 0 | best_tot_mse = tot_mse[j]; |
114 | 0 | best_id = j; |
115 | 0 | } |
116 | 0 | } |
117 | 0 | lev[nb_strengths] = best_id; |
118 | 0 | return best_tot_mse; |
119 | 0 | } |
120 | | |
121 | | /* Search for the best luma+chroma strength to add as an option, knowing we |
122 | | already selected nb_strengths options. */ |
123 | | static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, |
124 | | uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, |
125 | 0 | CDEF_PICK_METHOD pick_method) { |
126 | 0 | uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; |
127 | 0 | int i, j; |
128 | 0 | uint64_t best_tot_mse = (uint64_t)1 << 63; |
129 | 0 | int best_id0 = 0; |
130 | 0 | int best_id1 = 0; |
131 | 0 | const int total_strengths = nb_cdef_strengths[pick_method]; |
132 | 0 | memset(tot_mse, 0, sizeof(tot_mse)); |
133 | 0 | for (i = 0; i < sb_count; i++) { |
134 | 0 | int gi; |
135 | 0 | uint64_t best_mse = (uint64_t)1 << 63; |
136 | | /* Find best mse among already selected options. */ |
137 | 0 | for (gi = 0; gi < nb_strengths; gi++) { |
138 | 0 | uint64_t curr = mse[0][i][lev0[gi]]; |
139 | 0 | curr += mse[1][i][lev1[gi]]; |
140 | 0 | if (curr < best_mse) { |
141 | 0 | best_mse = curr; |
142 | 0 | } |
143 | 0 | } |
144 | | /* Find best mse when adding each possible new option. */ |
145 | 0 | for (j = 0; j < total_strengths; j++) { |
146 | 0 | int k; |
147 | 0 | for (k = 0; k < total_strengths; k++) { |
148 | 0 | uint64_t best = best_mse; |
149 | 0 | uint64_t curr = mse[0][i][j]; |
150 | 0 | curr += mse[1][i][k]; |
151 | 0 | if (curr < best) best = curr; |
152 | 0 | tot_mse[j][k] += best; |
153 | 0 | } |
154 | 0 | } |
155 | 0 | } |
156 | 0 | for (j = 0; j < total_strengths; j++) { |
157 | 0 | int k; |
158 | 0 | for (k = 0; k < total_strengths; k++) { |
159 | 0 | if (tot_mse[j][k] < best_tot_mse) { |
160 | 0 | best_tot_mse = tot_mse[j][k]; |
161 | 0 | best_id0 = j; |
162 | 0 | best_id1 = k; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | } |
166 | 0 | lev0[nb_strengths] = best_id0; |
167 | 0 | lev1[nb_strengths] = best_id1; |
168 | 0 | return best_tot_mse; |
169 | 0 | } |
170 | | |
171 | | /* Search for the set of strengths that minimizes mse. */ |
172 | | static uint64_t joint_strength_search(int *best_lev, int nb_strengths, |
173 | | uint64_t mse[][TOTAL_STRENGTHS], |
174 | | int sb_count, |
175 | 0 | CDEF_PICK_METHOD pick_method) { |
176 | 0 | uint64_t best_tot_mse; |
177 | 0 | int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && |
178 | 0 | pick_method <= CDEF_FAST_SEARCH_LVL5); |
179 | 0 | int i; |
180 | 0 | best_tot_mse = (uint64_t)1 << 63; |
181 | | /* Greedy search: add one strength options at a time. */ |
182 | 0 | for (i = 0; i < nb_strengths; i++) { |
183 | 0 | best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method); |
184 | 0 | } |
185 | | /* Trying to refine the greedy search by reconsidering each |
186 | | already-selected option. */ |
187 | 0 | if (!fast) { |
188 | 0 | for (i = 0; i < 4 * nb_strengths; i++) { |
189 | 0 | int j; |
190 | 0 | for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; |
191 | 0 | best_tot_mse = |
192 | 0 | search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method); |
193 | 0 | } |
194 | 0 | } |
195 | 0 | return best_tot_mse; |
196 | 0 | } |
197 | | |
198 | | /* Search for the set of luma+chroma strengths that minimizes mse. */ |
199 | | static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, |
200 | | int nb_strengths, |
201 | | uint64_t (**mse)[TOTAL_STRENGTHS], |
202 | | int sb_count, |
203 | 0 | CDEF_PICK_METHOD pick_method) { |
204 | 0 | uint64_t best_tot_mse; |
205 | 0 | int i; |
206 | 0 | best_tot_mse = (uint64_t)1 << 63; |
207 | | /* Greedy search: add one strength options at a time. */ |
208 | 0 | for (i = 0; i < nb_strengths; i++) { |
209 | 0 | best_tot_mse = |
210 | 0 | search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method); |
211 | 0 | } |
212 | | /* Trying to refine the greedy search by reconsidering each |
213 | | already-selected option. */ |
214 | 0 | for (i = 0; i < 4 * nb_strengths; i++) { |
215 | 0 | int j; |
216 | 0 | for (j = 0; j < nb_strengths - 1; j++) { |
217 | 0 | best_lev0[j] = best_lev0[j + 1]; |
218 | 0 | best_lev1[j] = best_lev1[j + 1]; |
219 | 0 | } |
220 | 0 | best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, |
221 | 0 | sb_count, pick_method); |
222 | 0 | } |
223 | 0 | return best_tot_mse; |
224 | 0 | } |
225 | | |
226 | | static inline void init_src_params(int *src_stride, int *width, int *height, |
227 | | int *width_log2, int *height_log2, |
228 | 0 | BLOCK_SIZE bsize) { |
229 | 0 | *src_stride = block_size_wide[bsize]; |
230 | 0 | *width = block_size_wide[bsize]; |
231 | 0 | *height = block_size_high[bsize]; |
232 | 0 | *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; |
233 | 0 | *height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize]; |
234 | 0 | } |
235 | | #if CONFIG_AV1_HIGHBITDEPTH |
236 | | /* Compute MSE only on the blocks we filtered. */ |
237 | | static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src, |
238 | | cdef_list *dlist, int cdef_count, |
239 | | BLOCK_SIZE bsize, int coeff_shift, |
240 | 0 | int row, int col) { |
241 | 0 | assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || |
242 | 0 | bsize == BLOCK_8X8); |
243 | 0 | uint64_t sum = 0; |
244 | 0 | int bi, bx, by; |
245 | 0 | uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst); |
246 | 0 | uint16_t *dst_buff = &dst16[row * dstride + col]; |
247 | 0 | int src_stride, width, height, width_log2, height_log2; |
248 | 0 | init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, |
249 | 0 | bsize); |
250 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
251 | 0 | by = dlist[bi].by; |
252 | 0 | bx = dlist[bi].bx; |
253 | 0 | sum += aom_mse_wxh_16bit_highbd( |
254 | 0 | &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride, |
255 | 0 | &src[bi << (height_log2 + width_log2)], src_stride, width, height); |
256 | 0 | } |
257 | 0 | return sum >> 2 * coeff_shift; |
258 | 0 | } |
259 | | #endif |
260 | | |
261 | | // Checks dual and quad block processing is applicable for block widths 8 and 4 |
262 | | // respectively. |
263 | | static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width, |
264 | 0 | int cdef_count, int bi, int iter) { |
265 | 0 | assert(width == 8 || width == 4); |
266 | 0 | const int blk_offset = (width == 8) ? 1 : 3; |
267 | 0 | if ((iter + blk_offset) >= cdef_count) return 0; |
268 | | |
269 | 0 | if (dlist[bi].by == dlist[bi + blk_offset].by && |
270 | 0 | dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx) |
271 | 0 | return 1; |
272 | | |
273 | 0 | return 0; |
274 | 0 | } |
275 | | |
276 | | static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src, |
277 | | cdef_list *dlist, int cdef_count, |
278 | | BLOCK_SIZE bsize, int coeff_shift, int row, |
279 | 0 | int col) { |
280 | 0 | assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || |
281 | 0 | bsize == BLOCK_8X8); |
282 | 0 | uint64_t sum = 0; |
283 | 0 | int bi, bx, by; |
284 | 0 | int iter = 0; |
285 | 0 | int inc = 1; |
286 | 0 | uint8_t *dst8 = (uint8_t *)dst; |
287 | 0 | uint8_t *dst_buff = &dst8[row * dstride + col]; |
288 | 0 | int src_stride, width, height, width_log2, height_log2; |
289 | 0 | init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, |
290 | 0 | bsize); |
291 | |
|
292 | 0 | const int num_blks = 16 / width; |
293 | 0 | for (bi = 0; bi < cdef_count; bi += inc) { |
294 | 0 | by = dlist[bi].by; |
295 | 0 | bx = dlist[bi].bx; |
296 | 0 | uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)]; |
297 | 0 | uint8_t *dst_tmp = |
298 | 0 | &dst_buff[(by << height_log2) * dstride + (bx << width_log2)]; |
299 | |
|
300 | 0 | if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) { |
301 | 0 | sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height); |
302 | 0 | iter += num_blks; |
303 | 0 | inc = num_blks; |
304 | 0 | } else { |
305 | 0 | sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width, |
306 | 0 | height); |
307 | 0 | iter += 1; |
308 | 0 | inc = 1; |
309 | 0 | } |
310 | 0 | } |
311 | |
|
312 | 0 | return sum >> 2 * coeff_shift; |
313 | 0 | } |
314 | | |
315 | | // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the |
316 | | // region is outside frame boundary |
317 | | static inline void fill_borders_for_fbs_on_frame_boundary( |
318 | | uint16_t *inbuf, int hfilt_size, int vfilt_size, |
319 | | bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary, |
320 | 0 | bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) { |
321 | 0 | if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary && |
322 | 0 | !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary) |
323 | 0 | return; |
324 | 0 | if (is_fb_on_frm_bottom_boundary) { |
325 | | // Fill bottom region of the block |
326 | 0 | const int buf_offset = |
327 | 0 | (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER; |
328 | 0 | fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size, |
329 | 0 | CDEF_VERY_LARGE); |
330 | 0 | } |
331 | 0 | if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) { |
332 | 0 | const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE; |
333 | | // Fill bottom-left region of the block |
334 | 0 | fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, |
335 | 0 | CDEF_VERY_LARGE); |
336 | 0 | } |
337 | 0 | if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) { |
338 | 0 | const int buf_offset = |
339 | 0 | (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER; |
340 | | // Fill bottom-right region of the block |
341 | 0 | fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, |
342 | 0 | CDEF_VERY_LARGE); |
343 | 0 | } |
344 | 0 | if (is_fb_on_frm_top_boundary) { |
345 | | // Fill top region of the block |
346 | 0 | fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size, |
347 | 0 | CDEF_VERY_LARGE); |
348 | 0 | } |
349 | 0 | if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) { |
350 | | // Fill top-left region of the block |
351 | 0 | fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); |
352 | 0 | } |
353 | 0 | if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) { |
354 | 0 | const int buf_offset = hfilt_size + CDEF_HBORDER; |
355 | | // Fill top-right region of the block |
356 | 0 | fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, |
357 | 0 | CDEF_VERY_LARGE); |
358 | 0 | } |
359 | 0 | if (is_fb_on_frm_left_boundary) { |
360 | 0 | const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE; |
361 | | // Fill left region of the block |
362 | 0 | fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER, |
363 | 0 | CDEF_VERY_LARGE); |
364 | 0 | } |
365 | 0 | if (is_fb_on_frm_right_boundary) { |
366 | 0 | const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE; |
367 | | // Fill right region of the block |
368 | 0 | fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE, |
369 | 0 | vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE); |
370 | 0 | } |
371 | 0 | } |
372 | | |
373 | | // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated |
374 | | // after CDEF filtering in single function call |
375 | | static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units( |
376 | | cdef_list *dlist, int cdef_count, int bi, int subsampling_x, |
377 | 0 | int subsampling_y) { |
378 | | // TODO(Ranjit): Extend the optimization for 422 |
379 | 0 | if (subsampling_x != subsampling_y) return 1; |
380 | | |
381 | | // Combining more blocks seems to increase encode time due to increase in |
382 | | // control code |
383 | 0 | if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by && |
384 | 0 | dlist[bi].bx + 3 == dlist[bi + 3].bx) { |
385 | | /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific |
386 | | * logic if y co-ordinates match and x co-ordinates are |
387 | | * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */ |
388 | 0 | return 4; |
389 | 0 | } |
390 | 0 | if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by && |
391 | 0 | dlist[bi].bx + 1 == dlist[bi + 1].bx) { |
392 | | /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific |
393 | | * logic if their y co-ordinates match and x co-ordinates are |
394 | | * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */ |
395 | 0 | return 2; |
396 | 0 | } |
397 | 0 | return 1; |
398 | 0 | } |
399 | | |
400 | | // Returns the block error after CDEF filtering for a given strength |
401 | | static inline uint64_t get_filt_error( |
402 | | const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd, |
403 | | cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit, |
404 | | int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer, |
405 | | int ref_stride, int row, int col, int pri_strength, int sec_strength, |
406 | 0 | int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) { |
407 | 0 | uint64_t curr_sse = 0; |
408 | 0 | const BLOCK_SIZE plane_bsize = |
409 | 0 | get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y); |
410 | 0 | const int bw_log2 = 3 - pd->subsampling_x; |
411 | 0 | const int bh_log2 = 3 - pd->subsampling_y; |
412 | | |
413 | | // TODO(Ranjit): Extend this optimization for HBD |
414 | 0 | if (!cdef_search_ctx->use_highbitdepth) { |
415 | | // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the |
416 | | // error at CDEF block level |
417 | 0 | const int tot_blk_count = |
418 | 0 | (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >> |
419 | 0 | (bw_log2 + bh_log2); |
420 | 0 | if (cdef_count == tot_blk_count) { |
421 | | // Calculate the offset in the buffer based on block position |
422 | 0 | const FULLPEL_MV this_mv = { row, col }; |
423 | 0 | const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); |
424 | 0 | if (pri_strength == 0 && sec_strength == 0) { |
425 | | // When CDEF strength is zero, filtering is not applied. Hence |
426 | | // error is calculated between source and unfiltered pixels |
427 | 0 | curr_sse = |
428 | 0 | aom_sse(&ref_buffer[buf_offset], ref_stride, |
429 | 0 | get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride, |
430 | 0 | block_size_wide[plane_bsize], block_size_high[plane_bsize]); |
431 | 0 | } else { |
432 | 0 | DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
433 | |
|
434 | 0 | av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in, |
435 | 0 | cdef_search_ctx->xdec[pli], |
436 | 0 | cdef_search_ctx->ydec[pli], dir, dirinit, var, pli, |
437 | 0 | dlist, cdef_count, pri_strength, |
438 | 0 | sec_strength + (sec_strength == 3), |
439 | 0 | cdef_search_ctx->damping, coeff_shift); |
440 | 0 | curr_sse = |
441 | 0 | aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8, |
442 | 0 | (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize], |
443 | 0 | block_size_high[plane_bsize]); |
444 | 0 | } |
445 | 0 | } else { |
446 | | // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering |
447 | | // functions produce 8-bit output and the error is calculated in 8-bit |
448 | | // domain |
449 | 0 | if (pri_strength == 0 && sec_strength == 0) { |
450 | 0 | int num_error_calc_filt_units = 1; |
451 | 0 | for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) { |
452 | 0 | const uint8_t by = dlist[bi].by; |
453 | 0 | const uint8_t bx = dlist[bi].bx; |
454 | 0 | const int16_t by_pos = (by << bh_log2); |
455 | 0 | const int16_t bx_pos = (bx << bw_log2); |
456 | | // Calculate the offset in the buffer based on block position |
457 | 0 | const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos }; |
458 | 0 | const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); |
459 | 0 | num_error_calc_filt_units = get_error_calc_width_in_filt_units( |
460 | 0 | dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y); |
461 | 0 | curr_sse += aom_sse( |
462 | 0 | &ref_buffer[buf_offset], ref_stride, |
463 | 0 | get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride, |
464 | 0 | num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2)); |
465 | 0 | } |
466 | 0 | } else { |
467 | 0 | DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
468 | 0 | av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in, |
469 | 0 | cdef_search_ctx->xdec[pli], |
470 | 0 | cdef_search_ctx->ydec[pli], dir, dirinit, var, pli, |
471 | 0 | dlist, cdef_count, pri_strength, |
472 | 0 | sec_strength + (sec_strength == 3), |
473 | 0 | cdef_search_ctx->damping, coeff_shift); |
474 | 0 | int num_error_calc_filt_units = 1; |
475 | 0 | for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) { |
476 | 0 | const uint8_t by = dlist[bi].by; |
477 | 0 | const uint8_t bx = dlist[bi].bx; |
478 | 0 | const int16_t by_pos = (by << bh_log2); |
479 | 0 | const int16_t bx_pos = (bx << bw_log2); |
480 | | // Calculate the offset in the buffer based on block position |
481 | 0 | const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos }; |
482 | 0 | const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos }; |
483 | 0 | const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride); |
484 | 0 | const int tmp_buf_offset = |
485 | 0 | get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2)); |
486 | 0 | num_error_calc_filt_units = get_error_calc_width_in_filt_units( |
487 | 0 | dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y); |
488 | 0 | curr_sse += aom_sse( |
489 | 0 | &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset], |
490 | 0 | (1 << MAX_SB_SIZE_LOG2), |
491 | 0 | num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2)); |
492 | 0 | } |
493 | 0 | } |
494 | 0 | } |
495 | 0 | } else { |
496 | 0 | DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
497 | |
|
498 | 0 | av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, |
499 | 0 | cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli], |
500 | 0 | dir, dirinit, var, pli, dlist, cdef_count, pri_strength, |
501 | 0 | sec_strength + (sec_strength == 3), |
502 | 0 | cdef_search_ctx->damping, coeff_shift); |
503 | 0 | curr_sse = cdef_search_ctx->compute_cdef_dist_fn( |
504 | 0 | ref_buffer, ref_stride, tmp_dst, dlist, cdef_count, |
505 | 0 | cdef_search_ctx->bsize[pli], coeff_shift, row, col); |
506 | 0 | } |
507 | 0 | return curr_sse; |
508 | 0 | } |
509 | | |
510 | | // Calculates MSE at block level. |
511 | | // Inputs: |
512 | | // cdef_search_ctx: Pointer to the structure containing parameters related to |
513 | | // CDEF search context. |
514 | | // fbr: Row index in units of 64x64 block |
515 | | // fbc: Column index in units of 64x64 block |
516 | | // Returns: |
517 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
518 | | void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, |
519 | | struct aom_internal_error_info *error_info, |
520 | 0 | int fbr, int fbc, int sb_count) { |
521 | | // TODO(aomedia:3276): Pass error_info to the low-level functions as required |
522 | | // in future to handle error propagation. |
523 | 0 | (void)error_info; |
524 | 0 | const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params; |
525 | 0 | const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref; |
526 | 0 | const int coeff_shift = cdef_search_ctx->coeff_shift; |
527 | 0 | const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2; |
528 | 0 | const int *mi_high_l2 = cdef_search_ctx->mi_high_l2; |
529 | | |
530 | | // Declare and initialize the temporary buffers. |
531 | 0 | DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]); |
532 | 0 | cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128]; |
533 | 0 | int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; |
534 | 0 | int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; |
535 | 0 | uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER; |
536 | 0 | int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc); |
537 | 0 | int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr); |
538 | 0 | int hb_step = 1, vb_step = 1; |
539 | 0 | BLOCK_SIZE bs; |
540 | |
|
541 | 0 | const MB_MODE_INFO *const mbmi = |
542 | 0 | mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride + |
543 | 0 | MI_SIZE_64X64 * fbc]; |
544 | |
|
545 | 0 | uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer, |
546 | 0 | ref->v_buffer }; |
547 | 0 | int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride, |
548 | 0 | ref->uv_stride }; |
549 | |
|
550 | 0 | if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 || |
551 | 0 | mbmi->bsize == BLOCK_64X128) { |
552 | 0 | bs = mbmi->bsize; |
553 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { |
554 | 0 | nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc); |
555 | 0 | hb_step = 2; |
556 | 0 | } |
557 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { |
558 | 0 | nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr); |
559 | 0 | vb_step = 2; |
560 | 0 | } |
561 | 0 | } else { |
562 | 0 | bs = BLOCK_64X64; |
563 | 0 | } |
564 | | // Get number of 8x8 blocks which are not skip. Cdef processing happens for |
565 | | // 8x8 blocks which are not skip. |
566 | 0 | const int cdef_count = av1_cdef_compute_sb_list( |
567 | 0 | mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs); |
568 | 0 | const bool is_fb_on_frm_left_boundary = (fbc == 0); |
569 | 0 | const bool is_fb_on_frm_right_boundary = |
570 | 0 | (fbc + hb_step == cdef_search_ctx->nhfb); |
571 | 0 | const bool is_fb_on_frm_top_boundary = (fbr == 0); |
572 | 0 | const bool is_fb_on_frm_bottom_boundary = |
573 | 0 | (fbr + vb_step == cdef_search_ctx->nvfb); |
574 | 0 | const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary); |
575 | 0 | const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary); |
576 | 0 | int dirinit = 0; |
577 | 0 | for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) { |
578 | | /* We avoid filtering the pixels for which some of the pixels to |
579 | | average are outside the frame. We could change the filter instead, |
580 | | but it would add special cases for any future vectorization. */ |
581 | 0 | const int hfilt_size = (nhb << mi_wide_l2[pli]); |
582 | 0 | const int vfilt_size = (nvb << mi_high_l2[pli]); |
583 | 0 | const int ysize = |
584 | 0 | vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff; |
585 | 0 | const int xsize = |
586 | 0 | hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff; |
587 | 0 | const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli]; |
588 | 0 | const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; |
589 | 0 | struct macroblockd_plane pd = cdef_search_ctx->plane[pli]; |
590 | 0 | cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE, |
591 | 0 | pd.dst.buf, row - yoff, col - xoff, pd.dst.stride, |
592 | 0 | ysize, xsize); |
593 | 0 | fill_borders_for_fbs_on_frame_boundary( |
594 | 0 | inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary, |
595 | 0 | is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary, |
596 | 0 | is_fb_on_frm_bottom_boundary); |
597 | 0 | for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) { |
598 | 0 | int pri_strength, sec_strength; |
599 | 0 | get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength, |
600 | 0 | &sec_strength, gi); |
601 | 0 | const uint64_t curr_mse = get_filt_error( |
602 | 0 | cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli], |
603 | 0 | ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count, |
604 | 0 | pli, coeff_shift, bs); |
605 | 0 | if (pli < 2) |
606 | 0 | cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse; |
607 | 0 | else |
608 | 0 | cdef_search_ctx->mse[1][sb_count][gi] += curr_mse; |
609 | 0 | } |
610 | 0 | } |
611 | 0 | cdef_search_ctx->sb_index[sb_count] = |
612 | 0 | MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc; |
613 | 0 | } |
614 | | |
615 | | // MSE calculation at frame level. |
616 | | // Inputs: |
617 | | // cdef_search_ctx: Pointer to the structure containing parameters related to |
618 | | // CDEF search context. |
619 | | // Returns: |
620 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
621 | | static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx, |
622 | 0 | struct aom_internal_error_info *error_info) { |
623 | | // Loop over each sb. |
624 | 0 | for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) { |
625 | 0 | for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) { |
626 | | // Checks if cdef processing can be skipped for particular sb. |
627 | 0 | if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue; |
628 | | // Calculate mse for each sb and store the relevant sb index. |
629 | 0 | av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc, |
630 | 0 | cdef_search_ctx->sb_count); |
631 | 0 | cdef_search_ctx->sb_count++; |
632 | 0 | } |
633 | 0 | } |
634 | 0 | } |
635 | | |
636 | | // Allocates memory for members of CdefSearchCtx. |
637 | | // Inputs: |
638 | | // cdef_search_ctx: Pointer to the structure containing parameters |
639 | | // related to CDEF search context. |
640 | | // Returns: |
641 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
642 | 0 | static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) { |
643 | 0 | const int nvfb = cdef_search_ctx->nvfb; |
644 | 0 | const int nhfb = cdef_search_ctx->nhfb; |
645 | 0 | CHECK_MEM_ERROR( |
646 | 0 | cm, cdef_search_ctx->sb_index, |
647 | 0 | aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0]))); |
648 | 0 | cdef_search_ctx->sb_count = 0; |
649 | 0 | CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0], |
650 | 0 | aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb)); |
651 | 0 | CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1], |
652 | 0 | aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb)); |
653 | 0 | } |
654 | | |
655 | | // Deallocates the memory allocated for members of CdefSearchCtx. |
656 | | // Inputs: |
657 | | // cdef_search_ctx: Pointer to the structure containing parameters |
658 | | // related to CDEF search context. |
659 | | // Returns: |
660 | | // Nothing will be returned. |
661 | 0 | void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) { |
662 | 0 | if (cdef_search_ctx) { |
663 | 0 | aom_free(cdef_search_ctx->mse[0]); |
664 | 0 | cdef_search_ctx->mse[0] = NULL; |
665 | 0 | aom_free(cdef_search_ctx->mse[1]); |
666 | 0 | cdef_search_ctx->mse[1] = NULL; |
667 | 0 | aom_free(cdef_search_ctx->sb_index); |
668 | 0 | cdef_search_ctx->sb_index = NULL; |
669 | 0 | } |
670 | 0 | } |
671 | | |
672 | | // Initialize the parameters related to CDEF search context. |
673 | | // Inputs: |
674 | | // frame: Pointer to compressed frame buffer |
675 | | // ref: Pointer to the frame buffer holding the source frame |
676 | | // cm: Pointer to top level common structure |
677 | | // xd: Pointer to common current coding block structure |
678 | | // cdef_search_ctx: Pointer to the structure containing parameters related to |
679 | | // CDEF search context. |
680 | | // pick_method: Search method used to select CDEF parameters |
681 | | // Returns: |
682 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
683 | | static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame, |
684 | | const YV12_BUFFER_CONFIG *ref, |
685 | | AV1_COMMON *cm, MACROBLOCKD *xd, |
686 | | CdefSearchCtx *cdef_search_ctx, |
687 | 0 | CDEF_PICK_METHOD pick_method) { |
688 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
689 | 0 | const int num_planes = av1_num_planes(cm); |
690 | 0 | cdef_search_ctx->mi_params = &cm->mi_params; |
691 | 0 | cdef_search_ctx->ref = ref; |
692 | 0 | cdef_search_ctx->nvfb = |
693 | 0 | (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
694 | 0 | cdef_search_ctx->nhfb = |
695 | 0 | (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
696 | 0 | cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0); |
697 | 0 | cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6); |
698 | 0 | cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method]; |
699 | 0 | cdef_search_ctx->num_planes = num_planes; |
700 | 0 | cdef_search_ctx->pick_method = pick_method; |
701 | 0 | cdef_search_ctx->sb_count = 0; |
702 | 0 | cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth; |
703 | 0 | av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0, |
704 | 0 | num_planes); |
705 | | // Initialize plane wise information. |
706 | 0 | for (int pli = 0; pli < num_planes; pli++) { |
707 | 0 | cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x; |
708 | 0 | cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y; |
709 | 0 | cdef_search_ctx->bsize[pli] = |
710 | 0 | cdef_search_ctx->ydec[pli] |
711 | 0 | ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4) |
712 | 0 | : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8); |
713 | 0 | cdef_search_ctx->mi_wide_l2[pli] = |
714 | 0 | MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; |
715 | 0 | cdef_search_ctx->mi_high_l2[pli] = |
716 | 0 | MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; |
717 | 0 | cdef_search_ctx->plane[pli] = xd->plane[pli]; |
718 | 0 | } |
719 | | // Function pointer initialization. |
720 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
721 | 0 | if (cm->seq_params->use_highbitdepth) { |
722 | 0 | cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd; |
723 | 0 | cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd; |
724 | 0 | } else { |
725 | 0 | cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd; |
726 | 0 | cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; |
727 | 0 | } |
728 | | #else |
729 | | cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd; |
730 | | cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; |
731 | | #endif |
732 | 0 | } |
733 | | |
734 | | void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef, |
735 | 0 | int is_screen_content) { |
736 | 0 | const int bd = cm->seq_params->bit_depth; |
737 | 0 | const int q = |
738 | 0 | av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8); |
739 | 0 | CdefInfo *const cdef_info = &cm->cdef_info; |
740 | | // Check the speed feature to avoid extra signaling. |
741 | 0 | if (skip_cdef) { |
742 | 0 | cdef_info->cdef_bits = 1; |
743 | 0 | cdef_info->nb_cdef_strengths = 2; |
744 | 0 | } else { |
745 | 0 | cdef_info->cdef_bits = 0; |
746 | 0 | cdef_info->nb_cdef_strengths = 1; |
747 | 0 | } |
748 | 0 | cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6); |
749 | |
|
750 | 0 | int predicted_y_f1 = 0; |
751 | 0 | int predicted_y_f2 = 0; |
752 | 0 | int predicted_uv_f1 = 0; |
753 | 0 | int predicted_uv_f2 = 0; |
754 | 0 | if (is_screen_content) { |
755 | 0 | predicted_y_f1 = |
756 | 0 | (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02); |
757 | 0 | predicted_y_f2 = |
758 | 0 | (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01); |
759 | 0 | predicted_uv_f1 = |
760 | 0 | (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01); |
761 | 0 | predicted_uv_f2 = |
762 | 0 | (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0); |
763 | 0 | predicted_y_f1 = clamp(predicted_y_f1, 0, 15); |
764 | 0 | predicted_y_f2 = clamp(predicted_y_f2, 0, 3); |
765 | 0 | predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15); |
766 | 0 | predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3); |
767 | 0 | } else { |
768 | 0 | if (!frame_is_intra_only(cm)) { |
769 | 0 | predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f + |
770 | 0 | q * 0.0068615186f + 0.02709886f), |
771 | 0 | 0, 15); |
772 | 0 | predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f + |
773 | 0 | q * 0.0013993345f + 0.03831067f), |
774 | 0 | 0, 3); |
775 | 0 | predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f + |
776 | 0 | q * 0.0034628846f + 0.00887099f), |
777 | 0 | 0, 15); |
778 | 0 | predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f + |
779 | 0 | q * 0.00028223585f + 0.05576307f), |
780 | 0 | 0, 3); |
781 | 0 | } else { |
782 | 0 | predicted_y_f1 = clamp( |
783 | 0 | (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f), |
784 | 0 | 0, 15); |
785 | 0 | predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f + |
786 | 0 | q * 0.0027798624f + 0.0079405f), |
787 | 0 | 0, 3); |
788 | 0 | predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f + |
789 | 0 | q * 0.012892405f - 0.00748388f), |
790 | 0 | 0, 15); |
791 | 0 | predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f + |
792 | 0 | q * 0.00035520183f + 0.00228092f), |
793 | 0 | 0, 3); |
794 | 0 | } |
795 | 0 | } |
796 | 0 | cdef_info->cdef_strengths[0] = |
797 | 0 | predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2; |
798 | 0 | cdef_info->cdef_uv_strengths[0] = |
799 | 0 | predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2; |
800 | | |
801 | | // mbmi->cdef_strength is already set in the encoding stage. We don't need to |
802 | | // set it again here. |
803 | 0 | if (skip_cdef) { |
804 | 0 | cdef_info->cdef_strengths[1] = 0; |
805 | 0 | cdef_info->cdef_uv_strengths[1] = 0; |
806 | 0 | return; |
807 | 0 | } |
808 | | |
809 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
810 | 0 | const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
811 | 0 | const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
812 | 0 | MB_MODE_INFO **mbmi = mi_params->mi_grid_base; |
813 | | // mbmi is NULL when real-time rate control library is used. |
814 | 0 | if (!mbmi) return; |
815 | 0 | for (int r = 0; r < nvfb; ++r) { |
816 | 0 | for (int c = 0; c < nhfb; ++c) { |
817 | 0 | MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c]; |
818 | 0 | current_mbmi->cdef_strength = 0; |
819 | 0 | } |
820 | 0 | mbmi += MI_SIZE_64X64 * mi_params->mi_stride; |
821 | 0 | } |
822 | 0 | } |
823 | | |
824 | 0 | void av1_cdef_search(AV1_COMP *cpi) { |
825 | 0 | AV1_COMMON *cm = &cpi->common; |
826 | 0 | CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control; |
827 | |
|
828 | 0 | assert(cdef_control != CDEF_NONE); |
829 | | // For CDEF_ADAPTIVE, turning off CDEF around qindex 32 was best for still |
830 | | // pictures |
831 | 0 | if ((cdef_control == CDEF_REFERENCE && |
832 | 0 | cpi->ppi->rtc_ref.non_reference_frame) || |
833 | 0 | (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA && |
834 | 0 | (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) && |
835 | 0 | cpi->oxcf.rc_cfg.cq_level <= 32)) { |
836 | 0 | CdefInfo *const cdef_info = &cm->cdef_info; |
837 | 0 | cdef_info->nb_cdef_strengths = 1; |
838 | 0 | cdef_info->cdef_bits = 0; |
839 | 0 | cdef_info->cdef_strengths[0] = 0; |
840 | 0 | cdef_info->cdef_uv_strengths[0] = 0; |
841 | 0 | return; |
842 | 0 | } |
843 | | |
844 | | // Indicate if external RC is used for testing |
845 | 0 | const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl; |
846 | 0 | if (rtc_ext_rc) { |
847 | 0 | av1_pick_cdef_from_qp(cm, 0, 0); |
848 | 0 | return; |
849 | 0 | } |
850 | 0 | CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method; |
851 | 0 | if (pick_method == CDEF_PICK_FROM_Q) { |
852 | 0 | const int use_screen_content_model = |
853 | 0 | cm->quant_params.base_qindex > |
854 | 0 | AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh, |
855 | 0 | cpi->rc.best_quality + 5) && |
856 | 0 | cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; |
857 | 0 | av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb, |
858 | 0 | use_screen_content_model); |
859 | 0 | return; |
860 | 0 | } |
861 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
862 | 0 | const int damping = 3 + (cm->quant_params.base_qindex >> 6); |
863 | 0 | const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && |
864 | 0 | pick_method <= CDEF_FAST_SEARCH_LVL5); |
865 | 0 | const int num_planes = av1_num_planes(cm); |
866 | 0 | MACROBLOCKD *xd = &cpi->td.mb.e_mbd; |
867 | |
|
868 | 0 | if (!cpi->cdef_search_ctx) |
869 | 0 | CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx, |
870 | 0 | aom_malloc(sizeof(*cpi->cdef_search_ctx))); |
871 | 0 | CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx; |
872 | | |
873 | | // Initialize parameters related to CDEF search context. |
874 | 0 | cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx, |
875 | 0 | pick_method); |
876 | | // Allocate CDEF search context buffers. |
877 | 0 | cdef_alloc_data(cm, cdef_search_ctx); |
878 | | // Frame level mse calculation. |
879 | 0 | if (cpi->mt_info.num_workers > 1) { |
880 | 0 | av1_cdef_mse_calc_frame_mt(cpi); |
881 | 0 | } else { |
882 | 0 | cdef_mse_calc_frame(cdef_search_ctx, cm->error); |
883 | 0 | } |
884 | | |
885 | | /* Search for different number of signaling bits. */ |
886 | 0 | int nb_strength_bits = 0; |
887 | 0 | uint64_t best_rd = UINT64_MAX; |
888 | 0 | CdefInfo *const cdef_info = &cm->cdef_info; |
889 | 0 | int sb_count = cdef_search_ctx->sb_count; |
890 | 0 | uint64_t(*mse[2])[TOTAL_STRENGTHS]; |
891 | 0 | mse[0] = cdef_search_ctx->mse[0]; |
892 | 0 | mse[1] = cdef_search_ctx->mse[1]; |
893 | | /* Calculate the maximum number of bits required to signal CDEF strengths at |
894 | | * block level */ |
895 | 0 | const int total_strengths = nb_cdef_strengths[pick_method]; |
896 | 0 | const int joint_strengths = |
897 | 0 | num_planes > 1 ? total_strengths * total_strengths : total_strengths; |
898 | 0 | const int max_signaling_bits = |
899 | 0 | joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1; |
900 | 0 | int rdmult = cpi->td.mb.rdmult; |
901 | 0 | for (int i = 0; i <= 3; i++) { |
902 | 0 | if (i > max_signaling_bits) break; |
903 | 0 | int best_lev0[CDEF_MAX_STRENGTHS] = { 0 }; |
904 | 0 | int best_lev1[CDEF_MAX_STRENGTHS] = { 0 }; |
905 | 0 | const int nb_strengths = 1 << i; |
906 | 0 | uint64_t tot_mse; |
907 | 0 | if (num_planes > 1) { |
908 | 0 | tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, |
909 | 0 | mse, sb_count, pick_method); |
910 | 0 | } else { |
911 | 0 | tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, |
912 | 0 | pick_method); |
913 | 0 | } |
914 | |
|
915 | 0 | const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * |
916 | 0 | (num_planes > 1 ? 2 : 1); |
917 | 0 | const int rate_cost = av1_cost_literal(total_bits); |
918 | 0 | const uint64_t dist = tot_mse * 16; |
919 | 0 | const uint64_t rd = RDCOST(rdmult, rate_cost, dist); |
920 | 0 | if (rd < best_rd) { |
921 | 0 | best_rd = rd; |
922 | 0 | nb_strength_bits = i; |
923 | 0 | memcpy(cdef_info->cdef_strengths, best_lev0, |
924 | 0 | nb_strengths * sizeof(best_lev0[0])); |
925 | 0 | if (num_planes > 1) { |
926 | 0 | memcpy(cdef_info->cdef_uv_strengths, best_lev1, |
927 | 0 | nb_strengths * sizeof(best_lev1[0])); |
928 | 0 | } |
929 | 0 | } |
930 | 0 | } |
931 | |
|
932 | 0 | cdef_info->cdef_bits = nb_strength_bits; |
933 | 0 | cdef_info->nb_cdef_strengths = 1 << nb_strength_bits; |
934 | 0 | for (int i = 0; i < sb_count; i++) { |
935 | 0 | uint64_t best_mse = UINT64_MAX; |
936 | 0 | int best_gi = 0; |
937 | 0 | for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) { |
938 | 0 | uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]]; |
939 | 0 | if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]]; |
940 | 0 | if (curr < best_mse) { |
941 | 0 | best_gi = gi; |
942 | 0 | best_mse = curr; |
943 | 0 | } |
944 | 0 | } |
945 | 0 | mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength = |
946 | 0 | best_gi; |
947 | 0 | } |
948 | 0 | if (fast) { |
949 | 0 | for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) { |
950 | 0 | const int luma_strength = cdef_info->cdef_strengths[j]; |
951 | 0 | const int chroma_strength = cdef_info->cdef_uv_strengths[j]; |
952 | 0 | int pri_strength, sec_strength; |
953 | |
|
954 | 0 | STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method, |
955 | 0 | luma_strength); |
956 | 0 | STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method, |
957 | 0 | chroma_strength); |
958 | 0 | } |
959 | 0 | } |
960 | | |
961 | | // For CDEF_ADAPTIVE, set primary and secondary CDEF at reduced strength for |
962 | | // qindexes 33 through 220. |
963 | | // Note 1: for odd strengths, the 0.5 discarded by ">> 1" is a significant |
964 | | // part of the strength when the strength is small, and because there are |
965 | | // few strength levels, odd strengths are reduced significantly more than a |
966 | | // half. This is intended behavior for reduced strength. |
967 | | // For example: a pri strength of 3 becomes 1, and a sec strength of 1 |
968 | | // becomes 0. |
969 | | // Note 2: a (signaled) sec strength value of 3 is special as it results in an |
970 | | // actual sec strength of 4. We tried adding +1 to the sec strength 3 so it |
971 | | // maps to a reduced sec strength of 2. However, on Daala's subset1, the |
972 | | // resulting SSIMULACRA 2 scores were either exactly the same (at cpu-used 6), |
973 | | // or within noise level (at cpu-used 3). Given that there were no discernible |
974 | | // improvements, this special mapping was left out for reduced strength. |
975 | 0 | if (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA && |
976 | 0 | (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) && |
977 | 0 | cpi->oxcf.rc_cfg.cq_level <= 220) { |
978 | 0 | for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) { |
979 | 0 | const int luma_strength = cdef_info->cdef_strengths[j]; |
980 | 0 | const int chroma_strength = cdef_info->cdef_uv_strengths[j]; |
981 | |
|
982 | 0 | const int new_pri_luma_strength = |
983 | 0 | (luma_strength / CDEF_SEC_STRENGTHS) >> 1; |
984 | 0 | const int new_sec_luma_strength = |
985 | 0 | (luma_strength % CDEF_SEC_STRENGTHS) >> 1; |
986 | 0 | const int new_pri_chroma_strength = |
987 | 0 | (chroma_strength / CDEF_SEC_STRENGTHS) >> 1; |
988 | 0 | const int new_sec_chroma_strength = |
989 | 0 | (chroma_strength % CDEF_SEC_STRENGTHS) >> 1; |
990 | |
|
991 | 0 | cdef_info->cdef_strengths[j] = |
992 | 0 | new_pri_luma_strength * CDEF_SEC_STRENGTHS + new_sec_luma_strength; |
993 | 0 | cdef_info->cdef_uv_strengths[j] = |
994 | 0 | new_pri_chroma_strength * CDEF_SEC_STRENGTHS + |
995 | 0 | new_sec_chroma_strength; |
996 | 0 | } |
997 | 0 | } |
998 | |
|
999 | 0 | cdef_info->cdef_damping = damping; |
1000 | | // Deallocate CDEF search context buffers. |
1001 | 0 | av1_cdef_dealloc_data(cdef_search_ctx); |
1002 | 0 | } |