/src/aom/av1/encoder/pickcdef.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <math.h> |
13 | | #include <string.h> |
14 | | |
15 | | #include "config/aom_dsp_rtcd.h" |
16 | | #include "config/aom_scale_rtcd.h" |
17 | | |
18 | | #include "aom/aom_integer.h" |
19 | | #include "av1/common/av1_common_int.h" |
20 | | #include "av1/common/reconinter.h" |
21 | | #include "av1/encoder/encoder.h" |
22 | | #include "av1/encoder/ethread.h" |
23 | | #include "av1/encoder/pickcdef.h" |
24 | | |
25 | | // Get primary and secondary filter strength for the given strength index and |
26 | | // search method |
27 | | static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method, |
28 | | int *pri_strength, |
29 | | int *sec_strength, |
30 | 0 | int strength_idx) { |
31 | 0 | const int tot_sec_filter = |
32 | 0 | (pick_method == CDEF_FAST_SEARCH_LVL5) |
33 | 0 | ? REDUCED_SEC_STRENGTHS_LVL5 |
34 | 0 | : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3 |
35 | 0 | : CDEF_SEC_STRENGTHS); |
36 | 0 | const int pri_idx = strength_idx / tot_sec_filter; |
37 | 0 | const int sec_idx = strength_idx % tot_sec_filter; |
38 | 0 | *pri_strength = pri_idx; |
39 | 0 | *sec_strength = sec_idx; |
40 | 0 | if (pick_method == CDEF_FULL_SEARCH) return; |
41 | | |
42 | 0 | switch (pick_method) { |
43 | 0 | case CDEF_FAST_SEARCH_LVL1: *pri_strength = priconv_lvl1[pri_idx]; break; |
44 | 0 | case CDEF_FAST_SEARCH_LVL2: *pri_strength = priconv_lvl2[pri_idx]; break; |
45 | 0 | case CDEF_FAST_SEARCH_LVL3: |
46 | 0 | *pri_strength = priconv_lvl2[pri_idx]; |
47 | 0 | *sec_strength = secconv_lvl3[sec_idx]; |
48 | 0 | break; |
49 | 0 | case CDEF_FAST_SEARCH_LVL4: |
50 | 0 | *pri_strength = priconv_lvl4[pri_idx]; |
51 | 0 | *sec_strength = secconv_lvl3[sec_idx]; |
52 | 0 | break; |
53 | 0 | case CDEF_FAST_SEARCH_LVL5: |
54 | 0 | *pri_strength = priconv_lvl5[pri_idx]; |
55 | 0 | *sec_strength = secconv_lvl5[sec_idx]; |
56 | 0 | break; |
57 | 0 | default: assert(0 && "Invalid CDEF search method"); |
58 | 0 | } |
59 | 0 | } |
60 | | |
61 | | // Store CDEF filter strength calculated from strength index for given search |
62 | | // method |
63 | | #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \ |
64 | 0 | get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \ |
65 | 0 | (strength_idx)); \ |
66 | 0 | cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; |
67 | | |
68 | | /* Search for the best strength to add as an option, knowing we |
69 | | already selected nb_strengths options. */ |
70 | | static uint64_t search_one(int *lev, int nb_strengths, |
71 | | uint64_t mse[][TOTAL_STRENGTHS], int sb_count, |
72 | 0 | CDEF_PICK_METHOD pick_method) { |
73 | 0 | uint64_t tot_mse[TOTAL_STRENGTHS]; |
74 | 0 | const int total_strengths = nb_cdef_strengths[pick_method]; |
75 | 0 | int i, j; |
76 | 0 | uint64_t best_tot_mse = (uint64_t)1 << 63; |
77 | 0 | int best_id = 0; |
78 | 0 | memset(tot_mse, 0, sizeof(tot_mse)); |
79 | 0 | for (i = 0; i < sb_count; i++) { |
80 | 0 | int gi; |
81 | 0 | uint64_t best_mse = (uint64_t)1 << 63; |
82 | | /* Find best mse among already selected options. */ |
83 | 0 | for (gi = 0; gi < nb_strengths; gi++) { |
84 | 0 | if (mse[i][lev[gi]] < best_mse) { |
85 | 0 | best_mse = mse[i][lev[gi]]; |
86 | 0 | } |
87 | 0 | } |
88 | | /* Find best mse when adding each possible new option. */ |
89 | 0 | for (j = 0; j < total_strengths; j++) { |
90 | 0 | uint64_t best = best_mse; |
91 | 0 | if (mse[i][j] < best) best = mse[i][j]; |
92 | 0 | tot_mse[j] += best; |
93 | 0 | } |
94 | 0 | } |
95 | 0 | for (j = 0; j < total_strengths; j++) { |
96 | 0 | if (tot_mse[j] < best_tot_mse) { |
97 | 0 | best_tot_mse = tot_mse[j]; |
98 | 0 | best_id = j; |
99 | 0 | } |
100 | 0 | } |
101 | 0 | lev[nb_strengths] = best_id; |
102 | 0 | return best_tot_mse; |
103 | 0 | } |
104 | | |
105 | | /* Search for the best luma+chroma strength to add as an option, knowing we |
106 | | already selected nb_strengths options. */ |
107 | | static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, |
108 | | uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, |
109 | 0 | CDEF_PICK_METHOD pick_method) { |
110 | 0 | uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; |
111 | 0 | int i, j; |
112 | 0 | uint64_t best_tot_mse = (uint64_t)1 << 63; |
113 | 0 | int best_id0 = 0; |
114 | 0 | int best_id1 = 0; |
115 | 0 | const int total_strengths = nb_cdef_strengths[pick_method]; |
116 | 0 | memset(tot_mse, 0, sizeof(tot_mse)); |
117 | 0 | for (i = 0; i < sb_count; i++) { |
118 | 0 | int gi; |
119 | 0 | uint64_t best_mse = (uint64_t)1 << 63; |
120 | | /* Find best mse among already selected options. */ |
121 | 0 | for (gi = 0; gi < nb_strengths; gi++) { |
122 | 0 | uint64_t curr = mse[0][i][lev0[gi]]; |
123 | 0 | curr += mse[1][i][lev1[gi]]; |
124 | 0 | if (curr < best_mse) { |
125 | 0 | best_mse = curr; |
126 | 0 | } |
127 | 0 | } |
128 | | /* Find best mse when adding each possible new option. */ |
129 | 0 | for (j = 0; j < total_strengths; j++) { |
130 | 0 | int k; |
131 | 0 | for (k = 0; k < total_strengths; k++) { |
132 | 0 | uint64_t best = best_mse; |
133 | 0 | uint64_t curr = mse[0][i][j]; |
134 | 0 | curr += mse[1][i][k]; |
135 | 0 | if (curr < best) best = curr; |
136 | 0 | tot_mse[j][k] += best; |
137 | 0 | } |
138 | 0 | } |
139 | 0 | } |
140 | 0 | for (j = 0; j < total_strengths; j++) { |
141 | 0 | int k; |
142 | 0 | for (k = 0; k < total_strengths; k++) { |
143 | 0 | if (tot_mse[j][k] < best_tot_mse) { |
144 | 0 | best_tot_mse = tot_mse[j][k]; |
145 | 0 | best_id0 = j; |
146 | 0 | best_id1 = k; |
147 | 0 | } |
148 | 0 | } |
149 | 0 | } |
150 | 0 | lev0[nb_strengths] = best_id0; |
151 | 0 | lev1[nb_strengths] = best_id1; |
152 | 0 | return best_tot_mse; |
153 | 0 | } |
154 | | |
155 | | /* Search for the set of strengths that minimizes mse. */ |
156 | | static uint64_t joint_strength_search(int *best_lev, int nb_strengths, |
157 | | uint64_t mse[][TOTAL_STRENGTHS], |
158 | | int sb_count, |
159 | 0 | CDEF_PICK_METHOD pick_method) { |
160 | 0 | uint64_t best_tot_mse; |
161 | 0 | int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && |
162 | 0 | pick_method <= CDEF_FAST_SEARCH_LVL5); |
163 | 0 | int i; |
164 | 0 | best_tot_mse = (uint64_t)1 << 63; |
165 | | /* Greedy search: add one strength options at a time. */ |
166 | 0 | for (i = 0; i < nb_strengths; i++) { |
167 | 0 | best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method); |
168 | 0 | } |
169 | | /* Trying to refine the greedy search by reconsidering each |
170 | | already-selected option. */ |
171 | 0 | if (!fast) { |
172 | 0 | for (i = 0; i < 4 * nb_strengths; i++) { |
173 | 0 | int j; |
174 | 0 | for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; |
175 | 0 | best_tot_mse = |
176 | 0 | search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method); |
177 | 0 | } |
178 | 0 | } |
179 | 0 | return best_tot_mse; |
180 | 0 | } |
181 | | |
182 | | /* Search for the set of luma+chroma strengths that minimizes mse. */ |
183 | | static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, |
184 | | int nb_strengths, |
185 | | uint64_t (**mse)[TOTAL_STRENGTHS], |
186 | | int sb_count, |
187 | 0 | CDEF_PICK_METHOD pick_method) { |
188 | 0 | uint64_t best_tot_mse; |
189 | 0 | int i; |
190 | 0 | best_tot_mse = (uint64_t)1 << 63; |
191 | | /* Greedy search: add one strength options at a time. */ |
192 | 0 | for (i = 0; i < nb_strengths; i++) { |
193 | 0 | best_tot_mse = |
194 | 0 | search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method); |
195 | 0 | } |
196 | | /* Trying to refine the greedy search by reconsidering each |
197 | | already-selected option. */ |
198 | 0 | for (i = 0; i < 4 * nb_strengths; i++) { |
199 | 0 | int j; |
200 | 0 | for (j = 0; j < nb_strengths - 1; j++) { |
201 | 0 | best_lev0[j] = best_lev0[j + 1]; |
202 | 0 | best_lev1[j] = best_lev1[j + 1]; |
203 | 0 | } |
204 | 0 | best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, |
205 | 0 | sb_count, pick_method); |
206 | 0 | } |
207 | 0 | return best_tot_mse; |
208 | 0 | } |
209 | | |
210 | | #if CONFIG_AV1_HIGHBITDEPTH |
211 | | static void copy_sb16_16_highbd(uint16_t *dst, int dstride, const void *src, |
212 | | int src_voffset, int src_hoffset, int sstride, |
213 | 0 | int vsize, int hsize) { |
214 | 0 | int r; |
215 | 0 | const uint16_t *src16 = CONVERT_TO_SHORTPTR((uint8_t *)src); |
216 | 0 | const uint16_t *base = &src16[src_voffset * sstride + src_hoffset]; |
217 | 0 | for (r = 0; r < vsize; r++) |
218 | 0 | memcpy(dst + r * dstride, base + r * sstride, hsize * sizeof(*base)); |
219 | 0 | } |
220 | | #endif |
221 | | |
222 | | static void copy_sb16_16(uint16_t *dst, int dstride, const void *src, |
223 | | int src_voffset, int src_hoffset, int sstride, |
224 | 0 | int vsize, int hsize) { |
225 | 0 | int r, c; |
226 | 0 | const uint8_t *src8 = (uint8_t *)src; |
227 | 0 | const uint8_t *base = &src8[src_voffset * sstride + src_hoffset]; |
228 | 0 | for (r = 0; r < vsize; r++) |
229 | 0 | for (c = 0; c < hsize; c++) |
230 | 0 | dst[r * dstride + c] = (uint16_t)base[r * sstride + c]; |
231 | 0 | } |
232 | | |
233 | | static INLINE void init_src_params(int *src_stride, int *width, int *height, |
234 | | int *width_log2, int *height_log2, |
235 | 0 | BLOCK_SIZE bsize) { |
236 | 0 | *src_stride = block_size_wide[bsize]; |
237 | 0 | *width = block_size_wide[bsize]; |
238 | 0 | *height = block_size_high[bsize]; |
239 | 0 | *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; |
240 | 0 | *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize]; |
241 | 0 | } |
242 | | #if CONFIG_AV1_HIGHBITDEPTH |
243 | | /* Compute MSE only on the blocks we filtered. */ |
244 | | static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src, |
245 | | cdef_list *dlist, int cdef_count, |
246 | | BLOCK_SIZE bsize, int coeff_shift, |
247 | 0 | int row, int col) { |
248 | 0 | assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || |
249 | 0 | bsize == BLOCK_8X8); |
250 | 0 | uint64_t sum = 0; |
251 | 0 | int bi, bx, by; |
252 | 0 | uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst); |
253 | 0 | uint16_t *dst_buff = &dst16[row * dstride + col]; |
254 | 0 | int src_stride, width, height, width_log2, height_log2; |
255 | 0 | init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, |
256 | 0 | bsize); |
257 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
258 | 0 | by = dlist[bi].by; |
259 | 0 | bx = dlist[bi].bx; |
260 | 0 | sum += aom_mse_wxh_16bit_highbd( |
261 | 0 | &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride, |
262 | 0 | &src[bi << (height_log2 + width_log2)], src_stride, width, height); |
263 | 0 | } |
264 | 0 | return sum >> 2 * coeff_shift; |
265 | 0 | } |
266 | | #endif |
267 | | static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src, |
268 | | cdef_list *dlist, int cdef_count, |
269 | | BLOCK_SIZE bsize, int coeff_shift, int row, |
270 | 0 | int col) { |
271 | 0 | assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || |
272 | 0 | bsize == BLOCK_8X8); |
273 | 0 | uint64_t sum = 0; |
274 | 0 | int bi, bx, by; |
275 | 0 | uint8_t *dst8 = (uint8_t *)dst; |
276 | 0 | uint8_t *dst_buff = &dst8[row * dstride + col]; |
277 | 0 | int src_stride, width, height, width_log2, height_log2; |
278 | 0 | init_src_params(&src_stride, &width, &height, &width_log2, &height_log2, |
279 | 0 | bsize); |
280 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
281 | 0 | by = dlist[bi].by; |
282 | 0 | bx = dlist[bi].bx; |
283 | 0 | sum += aom_mse_wxh_16bit( |
284 | 0 | &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride, |
285 | 0 | &src[bi << (height_log2 + width_log2)], src_stride, width, height); |
286 | 0 | } |
287 | 0 | return sum >> 2 * coeff_shift; |
288 | 0 | } |
289 | | |
290 | | // Calculates MSE at block level. |
291 | | // Inputs: |
292 | | // cdef_search_ctx: Pointer to the structure containing parameters related to |
293 | | // CDEF search context. |
294 | | // fbr: Row index in units of 64x64 block |
295 | | // fbc: Column index in units of 64x64 block |
296 | | // Returns: |
297 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
298 | | void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc, |
299 | 0 | int sb_count) { |
300 | 0 | const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params; |
301 | 0 | const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref; |
302 | 0 | const int coeff_shift = cdef_search_ctx->coeff_shift; |
303 | 0 | const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2; |
304 | 0 | const int *mi_high_l2 = cdef_search_ctx->mi_high_l2; |
305 | | |
306 | | // Declare and initialize the temporary buffers. |
307 | 0 | DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
308 | 0 | DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]); |
309 | 0 | cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128]; |
310 | 0 | int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; |
311 | 0 | int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; |
312 | 0 | uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER; |
313 | 0 | int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc); |
314 | 0 | int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr); |
315 | 0 | int hb_step = 1, vb_step = 1; |
316 | 0 | BLOCK_SIZE bs; |
317 | |
|
318 | 0 | const MB_MODE_INFO *const mbmi = |
319 | 0 | mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride + |
320 | 0 | MI_SIZE_64X64 * fbc]; |
321 | |
|
322 | 0 | uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer, |
323 | 0 | ref->v_buffer }; |
324 | 0 | int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride, |
325 | 0 | ref->uv_stride }; |
326 | |
|
327 | 0 | if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 || |
328 | 0 | mbmi->bsize == BLOCK_64X128) { |
329 | 0 | bs = mbmi->bsize; |
330 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { |
331 | 0 | nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc); |
332 | 0 | hb_step = 2; |
333 | 0 | } |
334 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { |
335 | 0 | nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr); |
336 | 0 | vb_step = 2; |
337 | 0 | } |
338 | 0 | } else { |
339 | 0 | bs = BLOCK_64X64; |
340 | 0 | } |
341 | | // Get number of 8x8 blocks which are not skip. Cdef processing happens for |
342 | | // 8x8 blocks which are not skip. |
343 | 0 | const int cdef_count = av1_cdef_compute_sb_list( |
344 | 0 | mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs); |
345 | |
|
346 | 0 | const int yoff = CDEF_VBORDER * (fbr != 0); |
347 | 0 | const int xoff = CDEF_HBORDER * (fbc != 0); |
348 | 0 | int dirinit = 0; |
349 | 0 | for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) { |
350 | 0 | for (int i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE; |
351 | | /* We avoid filtering the pixels for which some of the pixels to |
352 | | average are outside the frame. We could change the filter instead, |
353 | | but it would add special cases for any future vectorization. */ |
354 | 0 | const int ysize = (nvb << mi_high_l2[pli]) + |
355 | 0 | CDEF_VBORDER * (fbr + vb_step < cdef_search_ctx->nvfb) + |
356 | 0 | yoff; |
357 | 0 | const int xsize = (nhb << mi_wide_l2[pli]) + |
358 | 0 | CDEF_HBORDER * (fbc + hb_step < cdef_search_ctx->nhfb) + |
359 | 0 | xoff; |
360 | 0 | const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli]; |
361 | 0 | const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; |
362 | 0 | struct macroblockd_plane pd = cdef_search_ctx->plane[pli]; |
363 | 0 | cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE, |
364 | 0 | pd.dst.buf, row - yoff, col - xoff, pd.dst.stride, |
365 | 0 | ysize, xsize); |
366 | 0 | for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) { |
367 | 0 | int pri_strength, sec_strength; |
368 | 0 | get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength, |
369 | 0 | &sec_strength, gi); |
370 | 0 | av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, |
371 | 0 | cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli], |
372 | 0 | dir, &dirinit, var, pli, dlist, cdef_count, |
373 | 0 | pri_strength, sec_strength + (sec_strength == 3), |
374 | 0 | cdef_search_ctx->damping, coeff_shift); |
375 | 0 | const uint64_t curr_mse = cdef_search_ctx->compute_cdef_dist_fn( |
376 | 0 | ref_buffer[pli], ref_stride[pli], tmp_dst, dlist, cdef_count, |
377 | 0 | cdef_search_ctx->bsize[pli], coeff_shift, row, col); |
378 | 0 | if (pli < 2) |
379 | 0 | cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse; |
380 | 0 | else |
381 | 0 | cdef_search_ctx->mse[1][sb_count][gi] += curr_mse; |
382 | 0 | } |
383 | 0 | } |
384 | 0 | cdef_search_ctx->sb_index[sb_count] = |
385 | 0 | MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc; |
386 | 0 | } |
387 | | |
388 | | // MSE calculation at frame level. |
389 | | // Inputs: |
390 | | // cdef_search_ctx: Pointer to the structure containing parameters related to |
391 | | // CDEF search context. |
392 | | // Returns: |
393 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
394 | 0 | static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) { |
395 | | // Loop over each sb. |
396 | 0 | for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) { |
397 | 0 | for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) { |
398 | | // Checks if cdef processing can be skipped for particular sb. |
399 | 0 | if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue; |
400 | | // Calculate mse for each sb and store the relevant sb index. |
401 | 0 | av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc, |
402 | 0 | cdef_search_ctx->sb_count); |
403 | 0 | cdef_search_ctx->sb_count++; |
404 | 0 | } |
405 | 0 | } |
406 | 0 | } |
407 | | |
408 | | // Allocates memory for members of CdefSearchCtx. |
409 | | // Inputs: |
410 | | // cdef_search_ctx: Pointer to the structure containing parameters |
411 | | // related to CDEF search context. |
412 | | // Returns: |
413 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
414 | 0 | static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) { |
415 | 0 | const int nvfb = cdef_search_ctx->nvfb; |
416 | 0 | const int nhfb = cdef_search_ctx->nhfb; |
417 | 0 | cdef_search_ctx->sb_index = |
418 | 0 | aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index)); |
419 | 0 | cdef_search_ctx->sb_count = 0; |
420 | 0 | cdef_search_ctx->mse[0] = |
421 | 0 | aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb); |
422 | 0 | cdef_search_ctx->mse[1] = |
423 | 0 | aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb); |
424 | 0 | } |
425 | | |
426 | | // Deallocates the memory allocated for members of CdefSearchCtx. |
427 | | // Inputs: |
428 | | // cdef_search_ctx: Pointer to the structure containing parameters |
429 | | // related to CDEF search context. |
430 | | // Returns: |
431 | | // Nothing will be returned. |
432 | 0 | static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) { |
433 | 0 | aom_free(cdef_search_ctx->mse[0]); |
434 | 0 | aom_free(cdef_search_ctx->mse[1]); |
435 | 0 | aom_free(cdef_search_ctx->sb_index); |
436 | 0 | } |
437 | | |
438 | | // Initialize the parameters related to CDEF search context. |
439 | | // Inputs: |
440 | | // frame: Pointer to compressed frame buffer |
441 | | // ref: Pointer to the frame buffer holding the source frame |
442 | | // cm: Pointer to top level common structure |
443 | | // xd: Pointer to common current coding block structure |
444 | | // cdef_search_ctx: Pointer to the structure containing parameters related to |
445 | | // CDEF search context. |
446 | | // pick_method: Search method used to select CDEF parameters |
447 | | // Returns: |
448 | | // Nothing will be returned. Contents of cdef_search_ctx will be modified. |
449 | | static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame, |
450 | | const YV12_BUFFER_CONFIG *ref, |
451 | | AV1_COMMON *cm, MACROBLOCKD *xd, |
452 | | CdefSearchCtx *cdef_search_ctx, |
453 | 0 | CDEF_PICK_METHOD pick_method) { |
454 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
455 | 0 | const int num_planes = av1_num_planes(cm); |
456 | 0 | cdef_search_ctx->mi_params = &cm->mi_params; |
457 | 0 | cdef_search_ctx->ref = ref; |
458 | 0 | cdef_search_ctx->nvfb = |
459 | 0 | (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
460 | 0 | cdef_search_ctx->nhfb = |
461 | 0 | (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
462 | 0 | cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0); |
463 | 0 | cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6); |
464 | 0 | cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method]; |
465 | 0 | cdef_search_ctx->num_planes = num_planes; |
466 | 0 | cdef_search_ctx->pick_method = pick_method; |
467 | 0 | cdef_search_ctx->sb_count = 0; |
468 | 0 | av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0, |
469 | 0 | num_planes); |
470 | | // Initialize plane wise information. |
471 | 0 | for (int pli = 0; pli < num_planes; pli++) { |
472 | 0 | cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x; |
473 | 0 | cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y; |
474 | 0 | cdef_search_ctx->bsize[pli] = |
475 | 0 | cdef_search_ctx->ydec[pli] |
476 | 0 | ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4) |
477 | 0 | : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8); |
478 | 0 | cdef_search_ctx->mi_wide_l2[pli] = |
479 | 0 | MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; |
480 | 0 | cdef_search_ctx->mi_high_l2[pli] = |
481 | 0 | MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; |
482 | 0 | cdef_search_ctx->plane[pli] = xd->plane[pli]; |
483 | 0 | } |
484 | | // Function pointer initialization. |
485 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
486 | 0 | if (cm->seq_params->use_highbitdepth) { |
487 | 0 | cdef_search_ctx->copy_fn = copy_sb16_16_highbd; |
488 | 0 | cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd; |
489 | 0 | } else { |
490 | 0 | cdef_search_ctx->copy_fn = copy_sb16_16; |
491 | 0 | cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; |
492 | 0 | } |
493 | | #else |
494 | | cdef_search_ctx->copy_fn = copy_sb16_16; |
495 | | cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist; |
496 | | #endif |
497 | 0 | } |
498 | | |
499 | | static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef, |
500 | 0 | int frames_since_key) { |
501 | 0 | const int bd = cm->seq_params->bit_depth; |
502 | 0 | const int q = |
503 | 0 | av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8); |
504 | 0 | CdefInfo *const cdef_info = &cm->cdef_info; |
505 | | // Check the speed feature to avoid extra signaling. |
506 | 0 | if (skip_cdef) { |
507 | 0 | cdef_info->cdef_bits = 1; |
508 | 0 | cdef_info->nb_cdef_strengths = 2; |
509 | 0 | } else { |
510 | 0 | cdef_info->cdef_bits = 0; |
511 | 0 | cdef_info->nb_cdef_strengths = 1; |
512 | 0 | } |
513 | 0 | cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6); |
514 | |
|
515 | 0 | int predicted_y_f1 = 0; |
516 | 0 | int predicted_y_f2 = 0; |
517 | 0 | int predicted_uv_f1 = 0; |
518 | 0 | int predicted_uv_f2 = 0; |
519 | 0 | if (!frame_is_intra_only(cm)) { |
520 | 0 | predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f + |
521 | 0 | q * 0.0068615186f + 0.02709886f), |
522 | 0 | 0, 15); |
523 | 0 | predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f + |
524 | 0 | q * 0.0013993345f + 0.03831067f), |
525 | 0 | 0, 3); |
526 | 0 | predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f + |
527 | 0 | q * 0.0034628846f + 0.00887099f), |
528 | 0 | 0, 15); |
529 | 0 | predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f + |
530 | 0 | q * 0.00028223585f + 0.05576307f), |
531 | 0 | 0, 3); |
532 | 0 | } else { |
533 | 0 | predicted_y_f1 = clamp( |
534 | 0 | (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f), |
535 | 0 | 0, 15); |
536 | 0 | predicted_y_f2 = clamp( |
537 | 0 | (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f), |
538 | 0 | 0, 3); |
539 | 0 | predicted_uv_f1 = clamp( |
540 | 0 | (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f), |
541 | 0 | 0, 15); |
542 | 0 | predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f + |
543 | 0 | q * 0.00035520183f + 0.00228092f), |
544 | 0 | 0, 3); |
545 | 0 | } |
546 | 0 | cdef_info->cdef_strengths[0] = |
547 | 0 | predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2; |
548 | 0 | cdef_info->cdef_uv_strengths[0] = |
549 | 0 | predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2; |
550 | |
|
551 | 0 | if (skip_cdef) { |
552 | 0 | cdef_info->cdef_strengths[1] = 0; |
553 | 0 | cdef_info->cdef_uv_strengths[1] = 0; |
554 | 0 | } |
555 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
556 | 0 | const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
557 | 0 | const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
558 | 0 | MB_MODE_INFO **mbmi = mi_params->mi_grid_base; |
559 | 0 | for (int r = 0; r < nvfb; ++r) { |
560 | 0 | for (int c = 0; c < nhfb; ++c) { |
561 | 0 | MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c]; |
562 | 0 | current_mbmi->cdef_strength = 0; |
563 | 0 | if (skip_cdef && current_mbmi->skip_cdef_curr_sb && |
564 | 0 | frames_since_key > 10) { |
565 | 0 | current_mbmi->cdef_strength = 1; |
566 | 0 | } |
567 | 0 | } |
568 | 0 | mbmi += MI_SIZE_64X64 * mi_params->mi_stride; |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | | void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame, |
573 | | const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm, |
574 | | MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult, |
575 | | int skip_cdef_feature, int frames_since_key, |
576 | 0 | CDEF_CONTROL cdef_control, int non_reference_frame) { |
577 | 0 | assert(cdef_control != CDEF_NONE); |
578 | 0 | if (cdef_control == CDEF_REFERENCE && non_reference_frame) { |
579 | 0 | CdefInfo *const cdef_info = &cm->cdef_info; |
580 | 0 | cdef_info->nb_cdef_strengths = 1; |
581 | 0 | cdef_info->cdef_bits = 0; |
582 | 0 | cdef_info->cdef_strengths[0] = 0; |
583 | 0 | cdef_info->cdef_uv_strengths[0] = 0; |
584 | 0 | return; |
585 | 0 | } |
586 | | |
587 | 0 | if (pick_method == CDEF_PICK_FROM_Q) { |
588 | 0 | pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key); |
589 | 0 | return; |
590 | 0 | } |
591 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
592 | 0 | const int damping = 3 + (cm->quant_params.base_qindex >> 6); |
593 | 0 | const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && |
594 | 0 | pick_method <= CDEF_FAST_SEARCH_LVL5); |
595 | 0 | const int num_planes = av1_num_planes(cm); |
596 | 0 | CdefSearchCtx cdef_search_ctx; |
597 | | // Initialize parameters related to CDEF search context. |
598 | 0 | cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method); |
599 | | // Allocate CDEF search context buffers. |
600 | 0 | cdef_alloc_data(&cdef_search_ctx); |
601 | | // Frame level mse calculation. |
602 | 0 | if (mt_info->num_workers > 1) { |
603 | 0 | av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx); |
604 | 0 | } else { |
605 | 0 | cdef_mse_calc_frame(&cdef_search_ctx); |
606 | 0 | } |
607 | | |
608 | | /* Search for different number of signaling bits. */ |
609 | 0 | int nb_strength_bits = 0; |
610 | 0 | uint64_t best_rd = UINT64_MAX; |
611 | 0 | CdefInfo *const cdef_info = &cm->cdef_info; |
612 | 0 | int sb_count = cdef_search_ctx.sb_count; |
613 | 0 | uint64_t(*mse[2])[TOTAL_STRENGTHS]; |
614 | 0 | mse[0] = cdef_search_ctx.mse[0]; |
615 | 0 | mse[1] = cdef_search_ctx.mse[1]; |
616 | 0 | for (int i = 0; i <= 3; i++) { |
617 | 0 | int best_lev0[CDEF_MAX_STRENGTHS]; |
618 | 0 | int best_lev1[CDEF_MAX_STRENGTHS] = { 0 }; |
619 | 0 | const int nb_strengths = 1 << i; |
620 | 0 | uint64_t tot_mse; |
621 | 0 | if (num_planes > 1) { |
622 | 0 | tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, |
623 | 0 | mse, sb_count, pick_method); |
624 | 0 | } else { |
625 | 0 | tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, |
626 | 0 | pick_method); |
627 | 0 | } |
628 | |
|
629 | 0 | const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * |
630 | 0 | (num_planes > 1 ? 2 : 1); |
631 | 0 | const int rate_cost = av1_cost_literal(total_bits); |
632 | 0 | const uint64_t dist = tot_mse * 16; |
633 | 0 | const uint64_t rd = RDCOST(rdmult, rate_cost, dist); |
634 | 0 | if (rd < best_rd) { |
635 | 0 | best_rd = rd; |
636 | 0 | nb_strength_bits = i; |
637 | 0 | memcpy(cdef_info->cdef_strengths, best_lev0, |
638 | 0 | nb_strengths * sizeof(best_lev0[0])); |
639 | 0 | if (num_planes > 1) { |
640 | 0 | memcpy(cdef_info->cdef_uv_strengths, best_lev1, |
641 | 0 | nb_strengths * sizeof(best_lev1[0])); |
642 | 0 | } |
643 | 0 | } |
644 | 0 | } |
645 | |
|
646 | 0 | cdef_info->cdef_bits = nb_strength_bits; |
647 | 0 | cdef_info->nb_cdef_strengths = 1 << nb_strength_bits; |
648 | 0 | for (int i = 0; i < sb_count; i++) { |
649 | 0 | uint64_t best_mse = UINT64_MAX; |
650 | 0 | int best_gi = 0; |
651 | 0 | for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) { |
652 | 0 | uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]]; |
653 | 0 | if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]]; |
654 | 0 | if (curr < best_mse) { |
655 | 0 | best_gi = gi; |
656 | 0 | best_mse = curr; |
657 | 0 | } |
658 | 0 | } |
659 | 0 | mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength = |
660 | 0 | best_gi; |
661 | 0 | } |
662 | 0 | if (fast) { |
663 | 0 | for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) { |
664 | 0 | const int luma_strength = cdef_info->cdef_strengths[j]; |
665 | 0 | const int chroma_strength = cdef_info->cdef_uv_strengths[j]; |
666 | 0 | int pri_strength, sec_strength; |
667 | |
|
668 | 0 | STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method, |
669 | 0 | luma_strength); |
670 | 0 | STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method, |
671 | 0 | chroma_strength); |
672 | 0 | } |
673 | 0 | } |
674 | |
|
675 | 0 | cdef_info->cdef_damping = damping; |
676 | | // Deallocate CDEF search context buffers. |
677 | 0 | cdef_dealloc_data(&cdef_search_ctx); |
678 | 0 | } |