/work/svt-av1/Source/Lib/Codec/enc_cdef.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 3-Clause Clear License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | #include <stdio.h> |
12 | | #include <stdlib.h> |
13 | | #include <math.h> |
14 | | #include <string.h> |
15 | | |
16 | | #include "enc_cdef.h" |
17 | | #include <stdint.h> |
18 | | #include "aom_dsp_rtcd.h" |
19 | | #include "svt_log.h" |
20 | | #include "rd_cost.h" |
21 | | #include "rc_process.h" |
22 | | |
23 | | static INLINE uint64_t mse_8xn_16bit_c(const uint16_t* src, const uint16_t* dst, const int32_t dstride, |
24 | 0 | const int32_t height, uint8_t subsampling_factor) { |
25 | 0 | uint64_t sum = 0; |
26 | 0 | int32_t i, j; |
27 | 0 | for (i = 0; i < height; i += subsampling_factor) { |
28 | 0 | for (j = 0; j < 8; j++) { |
29 | 0 | int32_t e = dst[i * dstride + j] - src[8 * i + j]; |
30 | 0 | sum += e * e; |
31 | 0 | } |
32 | 0 | } |
33 | 0 | return sum; |
34 | 0 | } |
35 | | |
36 | | static INLINE uint64_t mse_4xn_16bit_c(const uint16_t* src, const uint16_t* dst, const int32_t dstride, |
37 | 0 | const int32_t height, uint8_t subsampling_factor) { |
38 | 0 | uint64_t sum = 0; |
39 | 0 | int32_t i, j; |
40 | 0 | for (i = 0; i < height; i += subsampling_factor) { |
41 | 0 | for (j = 0; j < 4; j++) { |
42 | 0 | int32_t e = dst[i * dstride + j] - src[4 * i + j]; |
43 | 0 | sum += e * e; |
44 | 0 | } |
45 | 0 | } |
46 | 0 | return sum; |
47 | 0 | } |
48 | | |
49 | | static INLINE uint64_t mse_8xn_8bit_c(const uint8_t* src, const uint8_t* dst, const int32_t dstride, |
50 | 0 | const int32_t height, uint8_t subsampling_factor) { |
51 | 0 | uint64_t sum = 0; |
52 | 0 | int32_t i, j; |
53 | 0 | for (i = 0; i < height; i += subsampling_factor) { |
54 | 0 | for (j = 0; j < 8; j++) { |
55 | 0 | int32_t e = dst[i * dstride + j] - src[8 * i + j]; |
56 | 0 | sum += e * e; |
57 | 0 | } |
58 | 0 | } |
59 | 0 | return sum; |
60 | 0 | } |
61 | | |
62 | | static INLINE uint64_t mse_4xn_8bit_c(const uint8_t* src, const uint8_t* dst, const int32_t dstride, |
63 | 0 | const int32_t height, uint8_t subsampling_factor) { |
64 | 0 | uint64_t sum = 0; |
65 | 0 | int32_t i, j; |
66 | 0 | for (i = 0; i < height; i += subsampling_factor) { |
67 | 0 | for (j = 0; j < 4; j++) { |
68 | 0 | int32_t e = dst[i * dstride + j] - src[4 * i + j]; |
69 | 0 | sum += e * e; |
70 | 0 | } |
71 | 0 | } |
72 | 0 | return sum; |
73 | 0 | } |
74 | | |
75 | | /* Compute MSE only on the blocks we filtered. */ |
76 | | uint64_t svt_aom_compute_cdef_dist_16bit_c(const uint16_t* dst, int32_t dstride, const uint16_t* src, |
77 | | const CdefList* dlist, int32_t cdef_count, BlockSize bsize, |
78 | 0 | int32_t coeff_shift, uint8_t subsampling_factor) { |
79 | 0 | uint64_t sum = 0; |
80 | 0 | int32_t bi, bx, by; |
81 | 0 | if (bsize == BLOCK_8X8) { |
82 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
83 | 0 | by = dlist[bi].by; |
84 | 0 | bx = dlist[bi].bx; |
85 | 0 | sum += mse_8xn_16bit_c( |
86 | 0 | &src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride, 8, subsampling_factor); |
87 | 0 | } |
88 | 0 | } else if (bsize == BLOCK_4X8) { |
89 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
90 | 0 | by = dlist[bi].by; |
91 | 0 | bx = dlist[bi].bx; |
92 | 0 | sum += mse_4xn_16bit_c( |
93 | 0 | &src[bi << (3 + 2)], &dst[(by << 3) * dstride + (bx << 2)], dstride, 8, subsampling_factor); |
94 | 0 | } |
95 | 0 | } else if (bsize == BLOCK_8X4) { |
96 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
97 | 0 | by = dlist[bi].by; |
98 | 0 | bx = dlist[bi].bx; |
99 | 0 | sum += mse_8xn_16bit_c( |
100 | 0 | &src[bi << (2 + 3)], &dst[(by << 2) * dstride + (bx << 3)], dstride, 4, subsampling_factor); |
101 | 0 | } |
102 | 0 | } else { |
103 | 0 | assert(bsize == BLOCK_4X4); |
104 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
105 | 0 | by = dlist[bi].by; |
106 | 0 | bx = dlist[bi].bx; |
107 | 0 | sum += mse_4xn_16bit_c( |
108 | 0 | &src[bi << (2 + 2)], &dst[(by << 2) * dstride + (bx << 2)], dstride, 4, subsampling_factor); |
109 | 0 | } |
110 | 0 | } |
111 | 0 | return sum >> 2 * coeff_shift; |
112 | 0 | } |
113 | | |
114 | | uint64_t svt_aom_compute_cdef_dist_8bit_c(const uint8_t* dst8, int32_t dstride, const uint8_t* src8, |
115 | | const CdefList* dlist, int32_t cdef_count, BlockSize bsize, |
116 | 0 | int32_t coeff_shift, uint8_t subsampling_factor) { |
117 | 0 | uint64_t sum = 0; |
118 | 0 | int32_t bi, bx, by; |
119 | 0 | if (bsize == BLOCK_8X8) { |
120 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
121 | 0 | by = dlist[bi].by; |
122 | 0 | bx = dlist[bi].bx; |
123 | 0 | sum += mse_8xn_8bit_c( |
124 | 0 | &src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride, 8, subsampling_factor); |
125 | 0 | } |
126 | 0 | } else if (bsize == BLOCK_4X8) { |
127 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
128 | 0 | by = dlist[bi].by; |
129 | 0 | bx = dlist[bi].bx; |
130 | 0 | sum += mse_4xn_8bit_c( |
131 | 0 | &src8[bi << (3 + 2)], &dst8[(by << 3) * dstride + (bx << 2)], dstride, 8, subsampling_factor); |
132 | 0 | } |
133 | 0 | } else if (bsize == BLOCK_8X4) { |
134 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
135 | 0 | by = dlist[bi].by; |
136 | 0 | bx = dlist[bi].bx; |
137 | 0 | sum += mse_8xn_8bit_c( |
138 | 0 | &src8[bi << (2 + 3)], &dst8[(by << 2) * dstride + (bx << 3)], dstride, 4, subsampling_factor); |
139 | 0 | } |
140 | 0 | } else { |
141 | 0 | assert(bsize == BLOCK_4X4); |
142 | 0 | for (bi = 0; bi < cdef_count; bi++) { |
143 | 0 | by = dlist[bi].by; |
144 | 0 | bx = dlist[bi].bx; |
145 | 0 | sum += mse_4xn_8bit_c( |
146 | 0 | &src8[bi << (2 + 2)], &dst8[(by << 2) * dstride + (bx << 2)], dstride, 4, subsampling_factor); |
147 | 0 | } |
148 | 0 | } |
149 | 0 | return sum >> 2 * coeff_shift; |
150 | 0 | } |
151 | | |
152 | 3.82k | static int32_t svt_sb_all_skip(PictureControlSet* pcs, const Av1Common* const cm, int32_t mi_row, int32_t mi_col) { |
153 | 3.82k | int32_t maxc, maxr; |
154 | 3.82k | maxc = cm->mi_cols - mi_col; |
155 | 3.82k | maxr = cm->mi_rows - mi_row; |
156 | | |
157 | 3.82k | maxr = AOMMIN(maxr, MI_SIZE_64X64); |
158 | 3.82k | maxc = AOMMIN(maxc, MI_SIZE_64X64); |
159 | | |
160 | 10.0k | for (int32_t r = 0; r < maxr; r++) { |
161 | 96.1k | for (int32_t c = 0; c < maxc; c++) { |
162 | 89.9k | if (!(pcs->mi_grid_base[(mi_row + r) * pcs->mi_stride + mi_col + c]->block_mi.skip)) { |
163 | 3.38k | return 0; |
164 | 3.38k | } |
165 | 89.9k | } |
166 | 9.58k | } |
167 | 438 | return 1; |
168 | 3.82k | } |
169 | | |
170 | | int32_t svt_sb_compute_cdef_list(PictureControlSet* pcs, const Av1Common* const cm, int32_t mi_row, int32_t mi_col, |
171 | 0 | CdefList* dlist, BlockSize bs) { |
172 | 0 | MbModeInfo** grid = pcs->mi_grid_base; |
173 | 0 | int32_t mi_stride = pcs->mi_stride; |
174 | |
|
175 | 0 | int32_t maxc = cm->mi_cols - mi_col; |
176 | 0 | int32_t maxr = cm->mi_rows - mi_row; |
177 | |
|
178 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { |
179 | 0 | maxc = AOMMIN(maxc, MI_SIZE_128X128); |
180 | 0 | } else { |
181 | 0 | maxc = AOMMIN(maxc, MI_SIZE_64X64); |
182 | 0 | } |
183 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { |
184 | 0 | maxr = AOMMIN(maxr, MI_SIZE_128X128); |
185 | 0 | } else { |
186 | 0 | maxr = AOMMIN(maxr, MI_SIZE_64X64); |
187 | 0 | } |
188 | |
|
189 | 0 | const int32_t r_step = mi_size_high[BLOCK_8X8]; |
190 | 0 | const int32_t c_step = mi_size_wide[BLOCK_8X8]; |
191 | 0 | const int32_t r_shift = (r_step == 2); |
192 | 0 | const int32_t c_shift = (c_step == 2); |
193 | |
|
194 | 0 | assert(r_step == 1 || r_step == 2); |
195 | 0 | assert(c_step == 1 || c_step == 2); |
196 | |
|
197 | 0 | int32_t count = 0; |
198 | 0 | for (int32_t r = 0; r < maxr; r += r_step) { |
199 | 0 | for (int32_t c = 0; c < maxc; c += c_step) { |
200 | 0 | if (!grid[(mi_row + r) * mi_stride + (mi_col + c)]->block_mi.skip || |
201 | 0 | !grid[(mi_row + r) * mi_stride + (mi_col + c + 1)]->block_mi.skip || |
202 | 0 | !grid[(mi_row + r + 1) * mi_stride + (mi_col + c)]->block_mi.skip || |
203 | 0 | !grid[(mi_row + r + 1) * mi_stride + (mi_col + c + 1)]->block_mi.skip) { |
204 | 0 | dlist[count].by = (uint8_t)(r >> r_shift); |
205 | 0 | dlist[count].bx = (uint8_t)(c >> c_shift); |
206 | 0 | count++; |
207 | 0 | } |
208 | 0 | } |
209 | 0 | } |
210 | 0 | return count; |
211 | 0 | } |
212 | | |
213 | 0 | static inline void svt_aom_fill_rect(uint16_t* dst, int32_t dstride, int32_t v, int32_t h, uint16_t x) { |
214 | 0 | for (int32_t i = 0; i < v; i++) { |
215 | 0 | for (int32_t j = 0; j < h; j++) { |
216 | 0 | dst[i * dstride + j] = x; |
217 | 0 | } |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | | static inline void svt_aom_copy_rect(uint16_t* dst, int32_t dstride, const uint16_t* src, int32_t sstride, int32_t v, |
222 | 0 | int32_t h) { |
223 | 0 | for (int32_t i = 0; i < v; i++) { |
224 | 0 | svt_memcpy(dst, src, sizeof(dst[0]) * h); |
225 | 0 | dst += dstride; |
226 | 0 | src += sstride; |
227 | 0 | } |
228 | 0 | } |
229 | | |
230 | | /* |
231 | | Loop over all 64x64 filter blocks and perform the CDEF filtering for each block, using |
232 | | the filter strength pairs chosen in finish_cdef_search(). |
233 | | */ |
234 | 0 | void svt_av1_cdef_frame(SequenceControlSet* scs, PictureControlSet* pcs) { |
235 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
236 | 0 | Av1Common* cm = ppcs->av1_cm; |
237 | 0 | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
238 | 0 | bool is_16bit = scs->is_16bit_pipeline; |
239 | |
|
240 | 0 | EbPictureBufferDesc* recon_pic; |
241 | 0 | svt_aom_get_recon_pic(pcs, &recon_pic, is_16bit); |
242 | |
|
243 | 0 | const int32_t num_planes = av1_num_planes(&scs->seq_header.color_config); |
244 | 0 | DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]); |
245 | 0 | uint16_t* linebuf[3]; |
246 | 0 | uint16_t* colbuf[3]; |
247 | 0 | CdefList dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; |
248 | 0 | uint8_t * row_cdef, *prev_row_cdef, *curr_row_cdef; |
249 | 0 | int32_t cdef_count; |
250 | 0 | const uint32_t sb_size = scs->super_block_size; |
251 | 0 | int32_t mi_wide_l2[3]; |
252 | 0 | int32_t mi_high_l2[3]; |
253 | 0 | int32_t xdec[3]; |
254 | 0 | int32_t ydec[3]; |
255 | 0 | int32_t coeff_shift = AOMMAX(scs->static_config.encoder_bit_depth - 8, 0); |
256 | 0 | const int32_t nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
257 | 0 | const int32_t nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
258 | 0 | const uint32_t cdef_size = sizeof(*row_cdef) * (nhfb + 2) * 2; |
259 | |
|
260 | 0 | row_cdef = (uint8_t*)svt_aom_malloc(cdef_size); |
261 | 0 | assert(row_cdef != NULL); |
262 | 0 | memset(row_cdef, 1, cdef_size); |
263 | 0 | prev_row_cdef = row_cdef + 1; |
264 | 0 | curr_row_cdef = prev_row_cdef + nhfb + 2; |
265 | 0 | for (int32_t pli = 0; pli < num_planes; pli++) { |
266 | 0 | int32_t subsampling_x = (pli == 0) ? 0 : 1; |
267 | 0 | int32_t subsampling_y = (pli == 0) ? 0 : 1; |
268 | 0 | xdec[pli] = subsampling_x; //CHKN xd->plane[pli].subsampling_x; |
269 | 0 | ydec[pli] = subsampling_y; //CHKN xd->plane[pli].subsampling_y; |
270 | 0 | mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x; |
271 | 0 | mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y; |
272 | 0 | } |
273 | |
|
274 | 0 | const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER; |
275 | 0 | for (int32_t pli = 0; pli < num_planes; pli++) { |
276 | 0 | linebuf[pli] = (uint16_t*)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride); |
277 | 0 | colbuf[pli] = (uint16_t*)svt_aom_malloc( |
278 | 0 | sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * CDEF_HBORDER); |
279 | 0 | } |
280 | 0 | #if OPT_CDEF_SKIP_CHROMA_BORDER |
281 | | // Frame-level check: if every UV strength entry is 0, no chroma block |
282 | | // will ever be filtered. In that case skip all chroma border copies |
283 | | // (including linebuf/colbuf saves) for the entire frame |
284 | 0 | bool chroma_filter_off = (num_planes > 1); |
285 | 0 | if (chroma_filter_off) { |
286 | 0 | for (int32_t i = 0; i < ppcs->nb_cdef_strengths; i++) { |
287 | 0 | if (frm_hdr->cdef_params.cdef_uv_strength[i] != 0) { |
288 | 0 | chroma_filter_off = false; |
289 | 0 | break; |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | 0 | const int32_t active_planes = chroma_filter_off ? 1 : num_planes; |
294 | 0 | #endif |
295 | 0 | for (int32_t fbr = 0; fbr < nvfb; fbr++) { |
296 | 0 | int32_t cdef_left = 1; |
297 | 0 | for (int32_t fbc = 0; fbc < nhfb; fbc++) { |
298 | 0 | int32_t level, sec_strength; |
299 | 0 | int32_t uv_level, uv_sec_strength; |
300 | 0 | int32_t nhb, nvb; |
301 | 0 | int32_t cstart = 0; |
302 | 0 | curr_row_cdef[fbc] = 0; |
303 | 0 | assert(pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] != NULL && |
304 | 0 | "CDEF ERROR: Skipping Current FB"); |
305 | 0 | assert(pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength != -1 && |
306 | 0 | "CDEF ERROR: Skipping Current FB"); |
307 | 0 | if (!cdef_left) { |
308 | 0 | cstart = |
309 | 0 | -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input. |
310 | 0 | } |
311 | |
|
312 | 0 | nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc); |
313 | 0 | nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr); |
314 | 0 | int32_t frame_top, frame_left, frame_bottom, frame_right; |
315 | |
|
316 | 0 | int32_t mi_row = MI_SIZE_64X64 * fbr; |
317 | 0 | int32_t mi_col = MI_SIZE_64X64 * fbc; |
318 | | // for the current filter block, it's top left corner mi structure (mi_tl) |
319 | | // is first accessed to check whether the top and left boundaries are |
320 | | // frame boundaries. Then bottom-left and top-right mi structures are |
321 | | // accessed to check whether the bottom and right boundaries |
322 | | // (respectively) are frame boundaries. |
323 | | // |
324 | | // Note that we can't just check the bottom-right mi structure - eg. if |
325 | | // we're at the right-hand edge of the frame but not the bottom, then |
326 | | // the bottom-right mi is NULL but the bottom-left is not. |
327 | 0 | frame_top = (mi_row == 0) ? 1 : 0; |
328 | 0 | frame_left = (mi_col == 0) ? 1 : 0; |
329 | |
|
330 | 0 | if (fbr != nvfb - 1) { |
331 | 0 | frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0; |
332 | 0 | } else { |
333 | 0 | frame_bottom = 1; |
334 | 0 | } |
335 | |
|
336 | 0 | if (fbc != nhfb - 1) { |
337 | 0 | frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0; |
338 | 0 | } else { |
339 | 0 | frame_right = 1; |
340 | 0 | } |
341 | | |
342 | | // Find the index of the CDEF strength for the filter block |
343 | 0 | const int32_t mbmi_cdef_strength = |
344 | 0 | pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength; |
345 | 0 | level = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS; |
346 | 0 | sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS; |
347 | | // Secondary luma strength takes values in {0, 1, 2, 4}. If sec_strength is equal to 3 from the step above, change it to 4. |
348 | 0 | sec_strength += sec_strength == 3; |
349 | | // Set primary and secondary chroma strengths. |
350 | 0 | uv_level = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS; |
351 | 0 | uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS; |
352 | | // Secondary chroma strength takes values in {0, 1, 2, 4}. If sec_strength is equal to 3 from the step above, change it to 4. |
353 | 0 | uv_sec_strength += uv_sec_strength == 3; |
354 | 0 | if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) || |
355 | 0 | (cdef_count = svt_sb_compute_cdef_list( |
356 | 0 | pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) { |
357 | 0 | cdef_left = 0; |
358 | 0 | continue; |
359 | 0 | } |
360 | | |
361 | 0 | int dirinit = !(ppcs->cdef_search_ctrls.use_reference_cdef_fs || ppcs->cdef_search_ctrls.use_qp_strength); |
362 | | // When SB 128 is used, the search for certain blocks is skipped, so dir/var info is not generated |
363 | | // In those cases, must generate info here |
364 | 0 | if (sb_size == 128) { |
365 | 0 | const uint32_t lc = MI_SIZE_64X64 * fbc; |
366 | 0 | const uint32_t lr = MI_SIZE_64X64 * fbr; |
367 | 0 | const MbModeInfo* mbmi = pcs->mi_grid_base[lr * cm->mi_stride + lc]; |
368 | 0 | const BlockSize bsize = mbmi->bsize; |
369 | 0 | if (((fbc & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_128X64)) || |
370 | 0 | ((fbr & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_64X128))) { |
371 | 0 | dirinit = 0; |
372 | 0 | } |
373 | 0 | } |
374 | 0 | uint8_t (*dir)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fbr * nhfb + fbc].dir; |
375 | 0 | int32_t (*var)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fbr * nhfb + fbc].var; |
376 | 0 | curr_row_cdef[fbc] = 1; |
377 | 0 | #if OPT_CDEF_SKIP_CHROMA_BORDER |
378 | 0 | for (int32_t pli = 0; pli < active_planes; pli++) { |
379 | | #else |
380 | | for (int32_t pli = 0; pli < num_planes; pli++) { |
381 | | #endif |
382 | 0 | int32_t coffset; |
383 | 0 | int32_t rend, cend; |
384 | 0 | int32_t pri_damping = frm_hdr->cdef_params.cdef_damping; |
385 | 0 | int32_t sec_damping = pri_damping; |
386 | 0 | int32_t hsize = nhb << mi_wide_l2[pli]; |
387 | 0 | int32_t vsize = nvb << mi_high_l2[pli]; |
388 | 0 | if (fbc == nhfb - 1) { |
389 | 0 | cend = hsize; |
390 | 0 | } else { |
391 | 0 | cend = hsize + CDEF_HBORDER; |
392 | 0 | } |
393 | |
|
394 | 0 | if (fbr == nvfb - 1) { |
395 | 0 | rend = vsize; |
396 | 0 | } else { |
397 | 0 | rend = vsize + CDEF_VBORDER; |
398 | 0 | } |
399 | |
|
400 | 0 | coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; |
401 | 0 | EbByte rec_buff = recon_pic->buffer[pli]; |
402 | 0 | uint32_t rec_stride = recon_pic->stride[pli]; |
403 | 0 | if (pli) { |
404 | 0 | level = uv_level; |
405 | 0 | sec_strength = uv_sec_strength; |
406 | 0 | } |
407 | | |
408 | | /* Copy in the pixels we need from the current superblock for |
409 | | deringing.*/ |
410 | 0 | svt_aom_copy_sb8_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart], |
411 | 0 | CDEF_BSTRIDE, |
412 | 0 | rec_buff, |
413 | 0 | (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, |
414 | 0 | coffset + cstart, |
415 | 0 | rec_stride, |
416 | 0 | rend, |
417 | 0 | cend - cstart, |
418 | 0 | is_16bit); |
419 | 0 | if (!prev_row_cdef[fbc]) { |
420 | 0 | svt_aom_copy_sb8_16(&src[CDEF_HBORDER], |
421 | 0 | CDEF_BSTRIDE, |
422 | 0 | rec_buff, |
423 | 0 | (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, |
424 | 0 | coffset, |
425 | 0 | rec_stride, |
426 | 0 | CDEF_VBORDER, |
427 | 0 | hsize, |
428 | 0 | is_16bit); |
429 | 0 | } else if (fbr > 0) { |
430 | 0 | svt_aom_copy_rect( |
431 | 0 | &src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset], stride, CDEF_VBORDER, hsize); |
432 | 0 | } else { |
433 | 0 | svt_aom_fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE); |
434 | 0 | } |
435 | |
|
436 | 0 | if (!prev_row_cdef[fbc - 1]) { |
437 | 0 | svt_aom_copy_sb8_16(src, |
438 | 0 | CDEF_BSTRIDE, |
439 | 0 | rec_buff, |
440 | 0 | (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, |
441 | 0 | coffset - CDEF_HBORDER, |
442 | 0 | rec_stride, |
443 | 0 | CDEF_VBORDER, |
444 | 0 | CDEF_HBORDER, |
445 | 0 | is_16bit); |
446 | 0 | } else if (fbr > 0 && fbc > 0) { |
447 | 0 | svt_aom_copy_rect( |
448 | 0 | src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER], stride, CDEF_VBORDER, CDEF_HBORDER); |
449 | 0 | } else { |
450 | 0 | svt_aom_fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); |
451 | 0 | } |
452 | |
|
453 | 0 | if (!prev_row_cdef[fbc + 1]) { |
454 | 0 | svt_aom_copy_sb8_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])], |
455 | 0 | CDEF_BSTRIDE, |
456 | 0 | rec_buff, |
457 | 0 | (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, |
458 | 0 | coffset + hsize, |
459 | 0 | rec_stride, |
460 | 0 | CDEF_VBORDER, |
461 | 0 | CDEF_HBORDER, |
462 | 0 | is_16bit); |
463 | 0 | } else if (fbr > 0 && fbc < nhfb - 1) { |
464 | 0 | svt_aom_copy_rect(&src[hsize + CDEF_HBORDER], |
465 | 0 | CDEF_BSTRIDE, |
466 | 0 | &linebuf[pli][coffset + hsize], |
467 | 0 | stride, |
468 | 0 | CDEF_VBORDER, |
469 | 0 | CDEF_HBORDER); |
470 | 0 | } else { |
471 | 0 | svt_aom_fill_rect( |
472 | 0 | &src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); |
473 | 0 | } |
474 | |
|
475 | 0 | if (cdef_left) { |
476 | | /* If we deringed the superblock on the left then we need to copy in |
477 | | saved pixels. */ |
478 | 0 | svt_aom_copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER, rend + CDEF_VBORDER, CDEF_HBORDER); |
479 | 0 | } |
480 | | |
481 | | /* Saving pixels in case we need to dering the superblock on the |
482 | | right. */ |
483 | 0 | if (fbc < nhfb - 1) { |
484 | 0 | svt_aom_copy_rect( |
485 | 0 | colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE, rend + CDEF_VBORDER, CDEF_HBORDER); |
486 | 0 | } |
487 | |
|
488 | 0 | if (fbr < nvfb - 1) { |
489 | 0 | svt_aom_copy_sb8_16(&linebuf[pli][coffset], |
490 | 0 | stride, |
491 | 0 | rec_buff, |
492 | 0 | (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER, |
493 | 0 | coffset, |
494 | 0 | rec_stride, |
495 | 0 | CDEF_VBORDER, |
496 | 0 | hsize, |
497 | 0 | is_16bit); |
498 | 0 | } |
499 | |
|
500 | 0 | if (frame_top) { |
501 | 0 | svt_aom_fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE); |
502 | 0 | } |
503 | 0 | if (frame_left) { |
504 | 0 | svt_aom_fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); |
505 | 0 | } |
506 | 0 | if (frame_bottom) { |
507 | 0 | svt_aom_fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], |
508 | 0 | CDEF_BSTRIDE, |
509 | 0 | CDEF_VBORDER, |
510 | 0 | hsize + 2 * CDEF_HBORDER, |
511 | 0 | CDEF_VERY_LARGE); |
512 | 0 | } |
513 | 0 | if (frame_right) { |
514 | 0 | svt_aom_fill_rect(&src[hsize + CDEF_HBORDER], |
515 | 0 | CDEF_BSTRIDE, |
516 | 0 | vsize + 2 * CDEF_VBORDER, |
517 | 0 | CDEF_HBORDER, |
518 | 0 | CDEF_VERY_LARGE); |
519 | 0 | } |
520 | | // if ppcs->cdef_ctrls.use_reference_cdef_fs is true, then search was not performed |
521 | | // Therefore, need to make sure dir and var are initialized |
522 | 0 | if (level || sec_strength || !dirinit) { |
523 | 0 | svt_cdef_filter_fb( |
524 | 0 | is_16bit ? NULL |
525 | 0 | : &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + |
526 | 0 | (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], |
527 | 0 | is_16bit ? &((uint16_t*)rec_buff)[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + |
528 | 0 | (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])] |
529 | 0 | : NULL, |
530 | 0 | rec_stride, |
531 | 0 | &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], |
532 | 0 | xdec[pli], |
533 | 0 | ydec[pli], |
534 | 0 | *dir, |
535 | 0 | &dirinit, |
536 | 0 | *var, |
537 | 0 | pli, |
538 | 0 | dlist, |
539 | 0 | cdef_count, |
540 | 0 | level, |
541 | 0 | sec_strength, |
542 | 0 | pri_damping, |
543 | 0 | sec_damping, |
544 | 0 | coeff_shift, |
545 | 0 | 1); // no subsampling |
546 | 0 | } |
547 | 0 | } |
548 | 0 | cdef_left = 1; //CHKN filtered data is written back directy to recFrame. |
549 | 0 | } |
550 | 0 | { |
551 | 0 | uint8_t* tmp = prev_row_cdef; |
552 | 0 | prev_row_cdef = curr_row_cdef; |
553 | 0 | curr_row_cdef = tmp; |
554 | 0 | } |
555 | 0 | } |
556 | 0 | svt_aom_free(row_cdef); |
557 | 0 | for (int32_t pli = 0; pli < num_planes; pli++) { |
558 | 0 | svt_aom_free(linebuf[pli]); |
559 | 0 | svt_aom_free(colbuf[pli]); |
560 | 0 | } |
561 | 0 | } |
562 | | |
563 | | ///-------search |
564 | | /* |
565 | | * Search for the best luma+chroma strength to add as an option, knowing we |
566 | | * already selected nb_strengths options |
567 | | * |
568 | | * Params: |
569 | | * |
570 | | * lev0 : Array of indices of selected luma strengths. |
571 | | * lev1 : Array of indices of selected chroma strengths. |
572 | | * nb_strengths : Number of selected (Luma_strength, Chroma_strength) pairs. |
573 | | * mse : Array of luma and chroma filtering mse values. |
574 | | * sb_count : Number of filter blocks in the frame. |
575 | | * start_gi : starting strength index for the search of the additional strengths. |
576 | | * end_gi : End index for the for the search of the additional strengths. |
577 | | */ |
578 | | uint64_t svt_search_one_dual_c(int* lev0, int* lev1, int nb_strengths, uint64_t** mse[2], int sb_count, int start_gi, |
579 | 0 | int end_gi) { |
580 | 0 | uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; |
581 | 0 | int32_t i, j; |
582 | 0 | uint64_t best_tot_mse = (uint64_t)1 << 63; |
583 | 0 | int32_t best_id0 = 0; |
584 | 0 | int32_t best_id1 = 0; |
585 | 0 | const int32_t total_strengths = end_gi; |
586 | 0 | memset(tot_mse, 0, sizeof(tot_mse)); |
587 | | /* Loop over the filter blocks in the frame */ |
588 | 0 | for (i = 0; i < sb_count; i++) { |
589 | 0 | int32_t gi; |
590 | 0 | uint64_t best_mse = (uint64_t)1 << 63; |
591 | | /* Loop over the already selected nb_strengths (Luma_strength, |
592 | | Chroma_strength) pairs, and find the pair that has the smallest mse |
593 | | (best_mse) for the current filter block.*/ |
594 | | /* Find best mse among already selected options. */ |
595 | 0 | for (gi = 0; gi < nb_strengths; gi++) { |
596 | 0 | uint64_t curr = mse[0][i][lev0[gi]]; |
597 | 0 | curr += mse[1][i][lev1[gi]]; |
598 | 0 | if (curr < best_mse) { |
599 | 0 | best_mse = curr; |
600 | 0 | } |
601 | 0 | } |
602 | | /* Loop over the set of available (Luma_strength, Chroma_strength) |
603 | | pairs, identify any that provide an mse better than best_mse from the |
604 | | step above for the current filter block, and update any corresponding |
605 | | total mse (tot_mse[j][k]). */ |
606 | | /* Find best mse when adding each possible new option. */ |
607 | 0 | for (j = start_gi; j < total_strengths; j++) { |
608 | 0 | int32_t k; |
609 | 0 | for (k = start_gi; k < total_strengths; k++) { |
610 | 0 | uint64_t best = best_mse; |
611 | 0 | uint64_t curr = mse[0][i][j]; |
612 | 0 | curr += mse[1][i][k]; |
613 | 0 | if (curr < best) { |
614 | 0 | best = curr; |
615 | 0 | } |
616 | 0 | tot_mse[j][k] += best; |
617 | 0 | } |
618 | 0 | } |
619 | 0 | } |
620 | | /* Loop over the additionally searched (Luma_strength, Chroma_strength) pairs |
621 | | from the step above, and identify any such pair that provided the best mse for |
622 | | the whole frame. The identified pair would be added to the set of already selected pairs. */ |
623 | 0 | for (j = start_gi; j < total_strengths; j++) { // Loop over the additionally searched luma strengths |
624 | 0 | int32_t k; |
625 | 0 | for (k = start_gi; k < total_strengths; k++) { // Loop over the additionally searched chroma strengths |
626 | 0 | if (tot_mse[j][k] < best_tot_mse) { |
627 | 0 | best_tot_mse = tot_mse[j][k]; |
628 | 0 | best_id0 = j; // index for the best luma strength |
629 | 0 | best_id1 = k; // index for the best chroma strength |
630 | 0 | } |
631 | 0 | } |
632 | 0 | } |
633 | 0 | lev0[nb_strengths] = best_id0; // Add the identified luma strength to the list of selected luma strengths |
634 | 0 | lev1[nb_strengths] = best_id1; // Add the identified chroma strength to the list of selected chroma strengths |
635 | 0 | return best_tot_mse; |
636 | 0 | } |
637 | | |
638 | | /* |
639 | | * Search for the set of luma+chroma strengths that minimizes mse. |
640 | | * |
641 | | * Params: |
642 | | * |
643 | | * best_lev0 : Array of indices of selected luma strengths. |
644 | | * best_lev1 : Array of indices of selected chroma strengths. |
645 | | * nb_strengths : Number of selected (Luma_strength, Chroma_strength) pairs. |
646 | | * mse : Array of luma and chroma filtering mse values. |
647 | | * sb_count : Number of filter blocks in the frame. |
648 | | * start_gi : starting strength index for the search of the additional strengths. |
649 | | * end_gi : End index for the for the search of the additional strengths. |
650 | | */ |
651 | | static uint64_t joint_strength_search_dual(int32_t* best_lev0, int32_t* best_lev1, int32_t nb_strengths, |
652 | 0 | uint64_t** mse[2], int32_t sb_count, int32_t start_gi, int32_t end_gi) { |
653 | 0 | uint64_t best_tot_mse; |
654 | 0 | int32_t i; |
655 | 0 | best_tot_mse = (uint64_t)1 << 63; |
656 | | /* Greedy search: add one strength options at a time. |
657 | | |
658 | | Determine nb_strengths (Luma_strength, Chroma_strength) pairs. |
659 | | The list of nb_strengths pairs is determined by adding one such pair at |
660 | | a time through the call to the function search_one_dual. When the |
661 | | function search_one_dual is called, the search accounts for the |
662 | | strength pairs that have already been added in the previous iteration of |
663 | | the loop below. The loop below returns in the end best_tot_mse |
664 | | representing the best filtering mse for the whole frame based on the |
665 | | selected list of best (Luma_strength, Chroma_strength) pairs. |
666 | | */ |
667 | 0 | for (i = 0; i < nb_strengths; i++) { |
668 | 0 | best_tot_mse = svt_search_one_dual(best_lev0, best_lev1, i, mse, sb_count, start_gi, end_gi); |
669 | 0 | } |
670 | | /* Performing further refinements on the search based on the results |
671 | | from the step above. Trying to refine the greedy search by reconsidering each |
672 | | already-selected option. */ |
673 | 0 | for (i = 0; i < 4 * nb_strengths; i++) { |
674 | 0 | int32_t j; |
675 | 0 | for (j = 0; j < nb_strengths - 1; j++) { |
676 | 0 | best_lev0[j] = best_lev0[j + 1]; |
677 | 0 | best_lev1[j] = best_lev1[j + 1]; |
678 | 0 | } |
679 | 0 | best_tot_mse = svt_search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count, start_gi, end_gi); |
680 | 0 | } |
681 | 0 | return best_tot_mse; |
682 | 0 | } |
683 | | |
684 | | // This kernel is ported/adapted from libaom (AV1 reference implementation). |
685 | | // Original logic inspired by aom_pick_cdef_from_qp(). |
686 | | // Adjusted to match SVT-AV1 data structures and pipeline integration. |
687 | | static void svt_pick_cdef_from_qp(PictureParentControlSet* ppcs, int32_t is_screen_content, int32_t* pred_y_strength, |
688 | 274 | int32_t* pred_uv_strength) { |
689 | 274 | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
690 | 274 | const uint8_t bit_depth = ppcs->enhanced_pic->bit_depth; |
691 | 274 | const int32_t base_q_idx = frm_hdr->quantization_params.base_q_idx; |
692 | | |
693 | 274 | int32_t q = svt_aom_ac_quant_qtx(base_q_idx, 0, bit_depth); |
694 | 274 | q >>= (bit_depth - 8); |
695 | | |
696 | 274 | int32_t y_f1 = 0, y_f2 = 0; |
697 | 274 | int32_t uv_f1 = 0, uv_f2 = 0; |
698 | | |
699 | 274 | const int32_t is_intra = (frm_hdr->frame_type == KEY_FRAME || frm_hdr->frame_type == INTRA_ONLY_FRAME); |
700 | | |
701 | 274 | if (is_screen_content) { |
702 | 0 | y_f1 = (int32_t)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02); |
703 | |
|
704 | 0 | y_f2 = (int32_t)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01); |
705 | |
|
706 | 0 | uv_f1 = (int32_t)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01); |
707 | |
|
708 | 0 | uv_f2 = (int32_t)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0); |
709 | 274 | } else if (!is_intra) { |
710 | 0 | y_f1 = (int32_t)roundf(q * q * -0.0000023593946f + q * 0.0068615186f + 0.02709886f); |
711 | |
|
712 | 0 | y_f2 = (int32_t)roundf(q * q * -0.00000057629734f + q * 0.0013993345f + 0.03831067f); |
713 | |
|
714 | 0 | uv_f1 = (int32_t)roundf(q * q * -0.0000007095069f + q * 0.0034628846f + 0.00887099f); |
715 | |
|
716 | 0 | uv_f2 = (int32_t)roundf(q * q * 0.00000023874085f + q * 0.00028223585f + 0.05576307f); |
717 | 274 | } else { // Intra |
718 | 274 | y_f1 = (int32_t)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f); |
719 | | |
720 | 274 | y_f2 = (int32_t)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f); |
721 | | |
722 | 274 | uv_f1 = (int32_t)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f); |
723 | | |
724 | 274 | uv_f2 = (int32_t)roundf(q * q * 0.0000032651783f + q * 0.00035520183f + 0.00228092f); |
725 | 274 | } |
726 | | |
727 | | // Clamp to AV1 limits |
728 | 274 | y_f1 = clamp(y_f1, 0, 15); |
729 | 274 | y_f2 = clamp(y_f2, 0, 3); |
730 | 274 | uv_f1 = clamp(uv_f1, 0, 15); |
731 | 274 | uv_f2 = clamp(uv_f2, 0, 3); |
732 | | |
733 | | // Pack primary + secondary |
734 | 274 | *pred_y_strength = y_f1 * CDEF_SEC_STRENGTHS + y_f2; |
735 | 274 | *pred_uv_strength = uv_f1 * CDEF_SEC_STRENGTHS + uv_f2; |
736 | 274 | } |
737 | | |
738 | | #if CLN_FINISH_CDEF |
739 | | |
740 | | // Propagate cdef_strength to all 64x64 mi |
741 | 3.38k | static INLINE void propagate_cdef_strength(PictureControlSet* pcs, int32_t sb_index, int8_t strength) { |
742 | 3.38k | MbModeInfo* mbmi = pcs->mi_grid_base[sb_index]; |
743 | 3.38k | mbmi->cdef_strength = strength; |
744 | 3.38k | switch (mbmi->bsize) { |
745 | 0 | case BLOCK_128X128: |
746 | 0 | pcs->mi_grid_base[sb_index + MI_SIZE_64X64]->cdef_strength = strength; |
747 | 0 | pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = strength; |
748 | 0 | pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = strength; |
749 | 0 | break; |
750 | 0 | case BLOCK_128X64: |
751 | 0 | pcs->mi_grid_base[sb_index + MI_SIZE_64X64]->cdef_strength = strength; |
752 | 0 | break; |
753 | 0 | case BLOCK_64X128: |
754 | 0 | pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = strength; |
755 | 0 | break; |
756 | 3.38k | default: |
757 | 3.38k | break; |
758 | 3.38k | } |
759 | 3.38k | } |
760 | | |
761 | 274 | #define CDEF_DAMPING_FROM_QP(base_q_idx) (3 + ((base_q_idx) >> 6)) |
762 | | |
763 | 274 | void finish_cdef_search(PictureControlSet* pcs) { |
764 | 274 | PictureParentControlSet* ppcs = pcs->ppcs; |
765 | 274 | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
766 | 274 | Av1Common* cm = ppcs->av1_cm; |
767 | 274 | int32_t mi_rows = ppcs->av1_cm->mi_rows; |
768 | 274 | int32_t mi_cols = ppcs->av1_cm->mi_cols; |
769 | | |
770 | 274 | int32_t fbr, fbc; |
771 | 274 | uint64_t best_tot_mse = (uint64_t)1 << 63; |
772 | 274 | int32_t sb_count; |
773 | 274 | int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
774 | 274 | int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
775 | | |
776 | 274 | CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls; |
777 | | |
778 | 274 | if (cdef_search_ctrls->use_qp_strength) { |
779 | 274 | #if OPT_SC_CDEF_QP |
780 | 274 | const bool allintra = ppcs->scs->allintra; |
781 | 274 | const uint8_t sc_class1 = ppcs->sc_class1; |
782 | 274 | const uint8_t sc_class5 = ppcs->sc_class5; |
783 | 274 | const uint8_t sc = allintra ? sc_class5 : sc_class1; |
784 | 274 | int pred_y, pred_uv; |
785 | 274 | svt_pick_cdef_from_qp(ppcs, sc, &pred_y, &pred_uv); |
786 | | #else |
787 | | int pred_y, pred_uv; |
788 | | svt_pick_cdef_from_qp(ppcs, 0, &pred_y, &pred_uv); |
789 | | #endif |
790 | 274 | frm_hdr->cdef_params.cdef_bits = 0; |
791 | 274 | ppcs->nb_cdef_strengths = 1; |
792 | 274 | frm_hdr->cdef_params.cdef_y_strength[0] = pred_y; |
793 | 274 | frm_hdr->cdef_params.cdef_uv_strength[0] = pred_uv; |
794 | 274 | frm_hdr->cdef_params.cdef_damping = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx); |
795 | | |
796 | 1.26k | for (fbr = 0; fbr < nvfb; ++fbr) { |
797 | 4.81k | for (fbc = 0; fbc < nhfb; ++fbc) { |
798 | 3.82k | const int32_t sb_idx = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc; |
799 | 3.82k | const MbModeInfo* mbmi = pcs->mi_grid_base[sb_idx]; |
800 | | |
801 | 3.82k | if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) || |
802 | 3.82k | ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) { |
803 | 0 | continue; |
804 | 0 | } |
805 | 3.82k | if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) { |
806 | 438 | continue; |
807 | 438 | } |
808 | | |
809 | 3.38k | propagate_cdef_strength(pcs, sb_idx, 0); |
810 | 3.38k | } |
811 | 988 | } |
812 | 274 | return; |
813 | 274 | } |
814 | | |
815 | 0 | CdefReconControls* cdef_recon_ctrls = &pcs->ppcs->cdef_recon_ctrls; |
816 | 0 | const int first_pass_fs_num = cdef_search_ctrls->first_pass_fs_num; |
817 | 0 | const int default_second_pass_fs_num = cdef_search_ctrls->default_second_pass_fs_num; |
818 | |
|
819 | 0 | frm_hdr->cdef_params.cdef_bits = 0; |
820 | 0 | ppcs->nb_cdef_strengths = 1; |
821 | 0 | frm_hdr->cdef_params.cdef_y_strength[0] = cdef_search_ctrls->pred_y_f; |
822 | 0 | frm_hdr->cdef_params.cdef_uv_strength[0] = cdef_search_ctrls->pred_uv_f; |
823 | 0 | frm_hdr->cdef_params.cdef_damping = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx); |
824 | |
|
825 | 0 | if (cdef_search_ctrls->use_reference_cdef_fs) { |
826 | 0 | for (fbr = 0; fbr < nvfb; ++fbr) { |
827 | 0 | for (fbc = 0; fbc < nhfb; ++fbc) { |
828 | 0 | const int32_t sb_idx = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc; |
829 | 0 | const MbModeInfo* mbmi = pcs->mi_grid_base[sb_idx]; |
830 | |
|
831 | 0 | if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) || |
832 | 0 | ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) { |
833 | 0 | continue; |
834 | 0 | } |
835 | 0 | if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) { |
836 | 0 | continue; |
837 | 0 | } |
838 | | |
839 | 0 | propagate_cdef_strength(pcs, sb_idx, 0); |
840 | 0 | } |
841 | 0 | } |
842 | 0 | return; |
843 | 0 | } |
844 | | |
845 | 0 | int32_t* sb_index; |
846 | 0 | EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb); |
847 | 0 | assert(sb_index != NULL); |
848 | |
|
849 | 0 | uint64_t** mse[2]; |
850 | 0 | EB_MALLOC_ARRAY_NO_CHECK(mse[0], nvfb * nhfb); |
851 | 0 | EB_MALLOC_ARRAY_NO_CHECK(mse[1], nvfb * nhfb); |
852 | 0 | assert(mse[0] != NULL); |
853 | 0 | assert(mse[1] != NULL); |
854 | |
|
855 | 0 | const int32_t start_gi = 0; |
856 | 0 | const int32_t end_gi = first_pass_fs_num + default_second_pass_fs_num; |
857 | 0 | int32_t i; |
858 | 0 | int32_t nb_strengths; |
859 | 0 | int32_t nb_strength_bits = 0; |
860 | 0 | uint64_t lambda; |
861 | 0 | uint32_t fast_lambda, full_lambda = 0; |
862 | |
|
863 | 0 | svt_aom_lambda_assign(pcs, |
864 | 0 | &fast_lambda, |
865 | 0 | &full_lambda, |
866 | 0 | pcs->ppcs->enhanced_pic->bit_depth, |
867 | 0 | pcs->ppcs->frm_hdr.quantization_params.base_q_idx, |
868 | 0 | false); |
869 | 0 | lambda = full_lambda; |
870 | 0 | sb_count = 0; |
871 | |
|
872 | 0 | for (fbr = 0; fbr < nvfb; ++fbr) { |
873 | 0 | for (fbc = 0; fbc < nhfb; ++fbc) { |
874 | 0 | const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]; |
875 | 0 | if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) || |
876 | 0 | ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) { |
877 | 0 | continue; |
878 | 0 | } |
879 | 0 | if (pcs->skip_cdef_seg[fbr * nhfb + fbc]) { |
880 | 0 | continue; |
881 | 0 | } |
882 | | |
883 | 0 | mse[0][sb_count] = pcs->mse_seg[0][fbr * nhfb + fbc]; |
884 | 0 | mse[1][sb_count] = pcs->mse_seg[1][fbr * nhfb + fbc]; |
885 | 0 | sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc; |
886 | 0 | sb_count++; |
887 | 0 | } |
888 | 0 | } |
889 | | |
890 | | // Scale down the cost of the (0,0) filter strength to bias selection towards off. When off, we can save the cost of the application |
891 | 0 | if (cdef_recon_ctrls->zero_fs_cost_bias) { |
892 | 0 | const bool is_16bit = (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT); |
893 | 0 | uint16_t factor; |
894 | 0 | for (i = 0; i < sb_count; i++) { |
895 | 0 | if (is_16bit) { |
896 | 0 | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
897 | 0 | if (mse[0][i][0] < 5000) { |
898 | 0 | factor = MIN(factor - 10, 64); |
899 | 0 | } else if (mse[0][i][0] < 10000) { |
900 | 0 | factor = MIN(factor - 5, 64); |
901 | 0 | } else if (mse[0][i][0] > 25000) { |
902 | 0 | factor = MIN(factor + 1, 64); |
903 | 0 | } |
904 | 0 | mse[0][i][0] = (factor * mse[0][i][0]) >> 6; |
905 | |
|
906 | 0 | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
907 | 0 | if (mse[1][i][0] < 5000) { |
908 | 0 | factor = MIN(factor - 10, 64); |
909 | 0 | } else if (mse[1][i][0] < 10000) { |
910 | 0 | factor = MIN(factor - 5, 64); |
911 | 0 | } else if (mse[1][i][0] > 25000) { |
912 | 0 | factor = MIN(factor + 1, 64); |
913 | 0 | } |
914 | 0 | mse[1][i][0] = (factor * mse[1][i][0]) >> 6; |
915 | 0 | } else { |
916 | 0 | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
917 | 0 | if (mse[0][i][0] > 25000) { |
918 | 0 | factor = MIN(factor + 2, 64); |
919 | 0 | } else if (mse[0][i][0] > 10000) { |
920 | 0 | factor = MIN(factor + 1, 64); |
921 | 0 | } |
922 | 0 | mse[0][i][0] = (factor * mse[0][i][0]) >> 6; |
923 | |
|
924 | 0 | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
925 | 0 | if (mse[1][i][0] > 25000) { |
926 | 0 | factor = MIN(factor + 2, 64); |
927 | 0 | } else if (mse[1][i][0] > 10000) { |
928 | 0 | factor = MIN(factor + 1, 64); |
929 | 0 | } |
930 | 0 | mse[1][i][0] = (factor * mse[1][i][0]) >> 6; |
931 | 0 | } |
932 | 0 | } |
933 | 0 | } |
934 | | |
935 | | // Compute cost of (strength=0) to derive pcs->cdef_dist_dev |
936 | 0 | int64_t zero_dist = 0; |
937 | 0 | for (i = 0; i < sb_count; i++) { |
938 | 0 | zero_dist += mse[0][i][0] + mse[1][i][0]; |
939 | 0 | } |
940 | 0 | uint64_t zero_cost = RDCOST(lambda, av1_cost_literal(CDEF_STRENGTH_BITS * 2), zero_dist << 4); |
941 | | |
942 | | // Search for different numbers of signalling bits |
943 | 0 | for (i = 0; i <= 3; i++) { |
944 | 0 | int32_t best_lev0[CDEF_MAX_STRENGTHS] = {0}; |
945 | 0 | int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0}; |
946 | 0 | nb_strengths = 1 << i; |
947 | 0 | uint64_t tot_mse = joint_strength_search_dual( |
948 | 0 | best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi); |
949 | |
|
950 | 0 | const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2; |
951 | 0 | const uint64_t cost = RDCOST(lambda, av1_cost_literal(total_bits), tot_mse * 16); |
952 | 0 | if (cost < best_tot_mse) { |
953 | 0 | best_tot_mse = cost; |
954 | 0 | nb_strength_bits = i; |
955 | 0 | for (int32_t j = 0; j < 1 << nb_strength_bits; j++) { |
956 | 0 | frm_hdr->cdef_params.cdef_y_strength[j] = best_lev0[j]; |
957 | 0 | frm_hdr->cdef_params.cdef_uv_strength[j] = cdef_search_ctrls->uv_from_y ? best_lev0[j] : best_lev1[j]; |
958 | 0 | } |
959 | 0 | } |
960 | 0 | } |
961 | |
|
962 | 0 | pcs->cdef_dist_dev = zero_cost == 0 ? 0 : (int32_t)(1000 - ((1000 * best_tot_mse) / zero_cost)); |
963 | 0 | nb_strengths = 1 << nb_strength_bits; |
964 | |
|
965 | 0 | frm_hdr->cdef_params.cdef_bits = nb_strength_bits; |
966 | 0 | ppcs->nb_cdef_strengths = nb_strengths; |
967 | | |
968 | | // Assign each filter block its best strength index |
969 | 0 | for (i = 0; i < sb_count; i++) { |
970 | 0 | int32_t gi; |
971 | 0 | int32_t best_gi = 0; |
972 | 0 | uint64_t best_mse = (uint64_t)1 << 63; |
973 | 0 | for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) { |
974 | 0 | uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]] + |
975 | 0 | mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]]; |
976 | 0 | if (curr < best_mse) { |
977 | 0 | best_gi = gi; |
978 | 0 | best_mse = curr; |
979 | 0 | } |
980 | 0 | } |
981 | 0 | propagate_cdef_strength(pcs, sb_index[i], (int8_t)best_gi); |
982 | 0 | } |
983 | | |
984 | | // Map search indices back to actual filter strengths |
985 | 0 | int filter_map[TOTAL_STRENGTHS] = {0}; |
986 | 0 | for (i = 0; i < first_pass_fs_num; i++) { |
987 | 0 | filter_map[i] = cdef_search_ctrls->default_first_pass_fs[i]; |
988 | 0 | } |
989 | 0 | for (i = 0; i < default_second_pass_fs_num; i++) { |
990 | 0 | filter_map[first_pass_fs_num + i] = cdef_search_ctrls->default_second_pass_fs[i]; |
991 | 0 | } |
992 | |
|
993 | 0 | for (i = 0; i < ppcs->nb_cdef_strengths; i++) { |
994 | 0 | frm_hdr->cdef_params.cdef_y_strength[i] = filter_map[frm_hdr->cdef_params.cdef_y_strength[i]]; |
995 | 0 | frm_hdr->cdef_params.cdef_uv_strength[i] = filter_map[frm_hdr->cdef_params.cdef_uv_strength[i]]; |
996 | 0 | } |
997 | |
|
998 | 0 | frm_hdr->cdef_params.cdef_damping = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx); |
999 | |
|
1000 | 0 | EB_FREE_ARRAY(mse[0]); |
1001 | 0 | EB_FREE_ARRAY(mse[1]); |
1002 | | EB_FREE_ARRAY(sb_index); |
1003 | 0 | } |
1004 | | #else |
1005 | | void finish_cdef_search(PictureControlSet* pcs) { |
1006 | | PictureParentControlSet* ppcs = pcs->ppcs; |
1007 | | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
1008 | | Av1Common* cm = ppcs->av1_cm; |
1009 | | int32_t mi_rows = ppcs->av1_cm->mi_rows; |
1010 | | int32_t mi_cols = ppcs->av1_cm->mi_cols; |
1011 | | |
1012 | | int32_t fbr, fbc; |
1013 | | uint64_t best_tot_mse = (uint64_t)1 << 63; |
1014 | | int32_t sb_count; |
1015 | | int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
1016 | | int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
1017 | | //CDEF Settings |
1018 | | CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls; |
1019 | | if (cdef_search_ctrls->use_qp_strength) { |
1020 | | int pred_y, pred_uv; |
1021 | | |
1022 | | // Predict Y/UV strengths from QP |
1023 | | svt_pick_cdef_from_qp(ppcs, 0, &pred_y, &pred_uv); |
1024 | | |
1025 | | // Frame-level parameters |
1026 | | frm_hdr->cdef_params.cdef_bits = 0; // only one strength index |
1027 | | ppcs->nb_cdef_strengths = 1; |
1028 | | frm_hdr->cdef_params.cdef_y_strength[0] = pred_y; |
1029 | | frm_hdr->cdef_params.cdef_uv_strength[0] = pred_uv; |
1030 | | frm_hdr->cdef_params.cdef_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6); |
1031 | | |
1032 | | // Assign strength index 0 to all valid 64x64 blocks |
1033 | | for (fbr = 0; fbr < nvfb; ++fbr) { |
1034 | | for (fbc = 0; fbc < nhfb; ++fbc) { |
1035 | | MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc]; |
1036 | | |
1037 | | // Skip duplicated 64x64 blocks inside larger 128x128/128x64/64x128 |
1038 | | if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) || |
1039 | | ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) { |
1040 | | continue; |
1041 | | } |
1042 | | |
1043 | | // No filtering if the entire filter block is skipped |
1044 | | if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) { |
1045 | | continue; |
1046 | | } |
1047 | | |
1048 | | mbmi->cdef_strength = 0; |
1049 | | |
1050 | | // Duplicate for large blocks in SVT MI map |
1051 | | switch (mbmi->bsize) { |
1052 | | case BLOCK_128X128: |
1053 | | pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc + MI_SIZE_64X64] |
1054 | | ->cdef_strength = 0; |
1055 | | |
1056 | | pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc] |
1057 | | ->cdef_strength = 0; |
1058 | | |
1059 | | pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc + |
1060 | | MI_SIZE_64X64] |
1061 | | ->cdef_strength = 0; |
1062 | | break; |
1063 | | |
1064 | | case BLOCK_128X64: |
1065 | | pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc + MI_SIZE_64X64] |
1066 | | ->cdef_strength = 0; |
1067 | | break; |
1068 | | |
1069 | | case BLOCK_64X128: |
1070 | | pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc] |
1071 | | ->cdef_strength = 0; |
1072 | | break; |
1073 | | |
1074 | | default: |
1075 | | break; |
1076 | | } |
1077 | | } |
1078 | | } |
1079 | | return; |
1080 | | } |
1081 | | |
1082 | | CdefReconControls* cdef_recon_ctrls = &pcs->ppcs->cdef_recon_ctrls; |
1083 | | const int first_pass_fs_num = cdef_search_ctrls->first_pass_fs_num; |
1084 | | const int default_second_pass_fs_num = cdef_search_ctrls->default_second_pass_fs_num; |
1085 | | |
1086 | | if (cdef_search_ctrls->use_reference_cdef_fs) { |
1087 | | int32_t* sb_index; |
1088 | | EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb); |
1089 | | int32_t best_gi = 0; |
1090 | | sb_count = 0; |
1091 | | assert(sb_index != NULL); |
1092 | | for (fbr = 0; fbr < nvfb; ++fbr) { |
1093 | | for (fbc = 0; fbc < nhfb; ++fbc) { |
1094 | | const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]; |
1095 | | if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) || |
1096 | | ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) { |
1097 | | continue; |
1098 | | } |
1099 | | // No filtering if the entire filter block is skipped |
1100 | | if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) { |
1101 | | continue; |
1102 | | } |
1103 | | sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc; |
1104 | | sb_count++; |
1105 | | } |
1106 | | } |
1107 | | for (int32_t i = 0; i < sb_count; i++) { |
1108 | | pcs->mi_grid_base[sb_index[i]]->cdef_strength = (int8_t)best_gi; |
1109 | | //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64. |
1110 | | //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data. |
1111 | | BlockSize bsize = pcs->mi_grid_base[sb_index[i]]->bsize; |
1112 | | switch (bsize) { |
1113 | | case BLOCK_128X128: |
1114 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi; |
1115 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi; |
1116 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = |
1117 | | (int8_t)best_gi; |
1118 | | break; |
1119 | | case BLOCK_128X64: |
1120 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi; |
1121 | | break; |
1122 | | case BLOCK_64X128: |
1123 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi; |
1124 | | break; |
1125 | | default: |
1126 | | break; |
1127 | | } |
1128 | | } |
1129 | | frm_hdr->cdef_params.cdef_bits = 0; |
1130 | | ppcs->nb_cdef_strengths = 1; |
1131 | | //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping |
1132 | | int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6); |
1133 | | frm_hdr->cdef_params.cdef_damping = pri_damping; |
1134 | | frm_hdr->cdef_params.cdef_y_strength[0] = cdef_search_ctrls->pred_y_f; |
1135 | | frm_hdr->cdef_params.cdef_uv_strength[0] = cdef_search_ctrls->pred_uv_f; |
1136 | | EB_FREE_ARRAY(sb_index); |
1137 | | return; |
1138 | | } |
1139 | | int32_t* sb_index; |
1140 | | // to keep track of the sb_address in units of SBs (not mi_size) |
1141 | | int32_t* sb_addr; |
1142 | | EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb); |
1143 | | EB_MALLOC_ARRAY_NO_CHECK(sb_addr, nvfb * nhfb); |
1144 | | assert(sb_index != NULL); |
1145 | | assert(sb_addr != NULL); |
1146 | | |
1147 | | uint64_t** mse[2]; |
1148 | | EB_MALLOC_ARRAY_NO_CHECK(mse[0], nvfb * nhfb); |
1149 | | EB_MALLOC_ARRAY_NO_CHECK(mse[1], nvfb * nhfb); |
1150 | | assert(mse[0] != NULL); |
1151 | | assert(mse[1] != NULL); |
1152 | | |
1153 | | int32_t start_gi = 0; |
1154 | | int32_t end_gi = first_pass_fs_num + default_second_pass_fs_num; |
1155 | | int32_t i; |
1156 | | int32_t nb_strengths; |
1157 | | int32_t nb_strength_bits; |
1158 | | uint64_t lambda; |
1159 | | uint32_t fast_lambda, full_lambda = 0; |
1160 | | |
1161 | | svt_aom_lambda_assign(pcs, |
1162 | | &fast_lambda, |
1163 | | &full_lambda, |
1164 | | pcs->ppcs->enhanced_pic->bit_depth, |
1165 | | pcs->ppcs->frm_hdr.quantization_params.base_q_idx, |
1166 | | false); |
1167 | | lambda = full_lambda; |
1168 | | sb_count = 0; |
1169 | | for (fbr = 0; fbr < nvfb; ++fbr) { |
1170 | | for (fbc = 0; fbc < nhfb; ++fbc) { |
1171 | | const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]; |
1172 | | if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) || |
1173 | | ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) { |
1174 | | continue; |
1175 | | } |
1176 | | |
1177 | | // No filtering if the entire filter block is skipped |
1178 | | if (pcs->skip_cdef_seg[fbr * nhfb + fbc]) { |
1179 | | continue; |
1180 | | } |
1181 | | // point to the MSE data |
1182 | | mse[0][sb_count] = pcs->mse_seg[0][fbr * nhfb + fbc]; |
1183 | | mse[1][sb_count] = pcs->mse_seg[1][fbr * nhfb + fbc]; |
1184 | | |
1185 | | sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc; |
1186 | | sb_addr[sb_count] = fbr * nhfb + fbc; |
1187 | | sb_count++; |
1188 | | } |
1189 | | } |
1190 | | |
1191 | | nb_strength_bits = 0; |
1192 | | // Scale down the cost of the (0,0) filter strength to bias selection towards off. |
1193 | | // When off, can save the cost of the application. |
1194 | | if (cdef_recon_ctrls->zero_fs_cost_bias) { |
1195 | | const bool is_16bit = (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT); |
1196 | | uint16_t factor; |
1197 | | for (i = 0; i < sb_count; i++) { |
1198 | | if (is_16bit) { |
1199 | | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
1200 | | if (mse[0][i][0] < 5000) { |
1201 | | factor = MIN(factor - 10, 64); |
1202 | | } else if (mse[0][i][0] < 10000) { |
1203 | | factor = MIN(factor - 5, 64); |
1204 | | } else if (mse[0][i][0] > 25000) { |
1205 | | factor = MIN(factor + 1, 64); |
1206 | | } |
1207 | | mse[0][i][0] = (factor * mse[0][i][0]) >> 6; |
1208 | | |
1209 | | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
1210 | | if (mse[1][i][0] < 5000) { |
1211 | | factor = MIN(factor - 10, 64); |
1212 | | } else if (mse[1][i][0] < 10000) { |
1213 | | factor = MIN(factor - 5, 64); |
1214 | | } else if (mse[1][i][0] > 25000) { |
1215 | | factor = MIN(factor + 1, 64); |
1216 | | } |
1217 | | mse[1][i][0] = (factor * mse[1][i][0]) >> 6; |
1218 | | } else { |
1219 | | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
1220 | | if (mse[0][i][0] > 25000) { |
1221 | | factor = MIN(factor + 2, 64); |
1222 | | } else if (mse[0][i][0] > 10000) { |
1223 | | factor = MIN(factor + 1, 64); |
1224 | | } |
1225 | | mse[0][i][0] = (factor * mse[0][i][0]) >> 6; |
1226 | | |
1227 | | factor = cdef_recon_ctrls->zero_fs_cost_bias; |
1228 | | if (mse[1][i][0] > 25000) { |
1229 | | factor = MIN(factor + 2, 64); |
1230 | | } else if (mse[1][i][0] > 10000) { |
1231 | | factor = MIN(factor + 1, 64); |
1232 | | } |
1233 | | |
1234 | | mse[1][i][0] = (factor * mse[1][i][0]) >> 6; |
1235 | | } |
1236 | | } |
1237 | | } |
1238 | | // Compute cost of off to use in deriving pcs->cdef_dist_dev |
1239 | | int64_t zero_dist = 0; |
1240 | | for (i = 0; i < sb_count; i++) { |
1241 | | zero_dist += mse[0][i][0] + mse[1][i][0]; |
1242 | | } |
1243 | | uint64_t zero_cost = RDCOST(lambda, av1_cost_literal(CDEF_STRENGTH_BITS * 2), zero_dist << 4); |
1244 | | /* Search for different number of signalling bits. */ |
1245 | | for (i = 0; i <= 3; i++) { |
1246 | | int32_t best_lev0[CDEF_MAX_STRENGTHS] = {0}; |
1247 | | int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0}; |
1248 | | nb_strengths = 1 << i; |
1249 | | uint64_t tot_mse = joint_strength_search_dual( |
1250 | | best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi); |
1251 | | /* Count superblock signalling cost. */ |
1252 | | const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2; |
1253 | | const int rate_cost = av1_cost_literal(total_bits); |
1254 | | const uint64_t dist = tot_mse * 16; |
1255 | | tot_mse = RDCOST(lambda, rate_cost, dist); |
1256 | | if (tot_mse < best_tot_mse) { |
1257 | | best_tot_mse = tot_mse; |
1258 | | nb_strength_bits = i; |
1259 | | for (int32_t j = 0; j < 1 << nb_strength_bits; j++) { |
1260 | | frm_hdr->cdef_params.cdef_y_strength[j] = best_lev0[j]; |
1261 | | frm_hdr->cdef_params.cdef_uv_strength[j] = cdef_search_ctrls->uv_from_y ? best_lev0[j] : best_lev1[j]; |
1262 | | } |
1263 | | } |
1264 | | } |
1265 | | pcs->cdef_dist_dev = zero_cost == 0 ? 0 : (int32_t)(1000 - ((1000 * best_tot_mse) / zero_cost)); |
1266 | | nb_strengths = 1 << nb_strength_bits; |
1267 | | |
1268 | | frm_hdr->cdef_params.cdef_bits = nb_strength_bits; |
1269 | | ppcs->nb_cdef_strengths = nb_strengths; |
1270 | | for (i = 0; i < sb_count; i++) { |
1271 | | int32_t gi; |
1272 | | int32_t best_gi; |
1273 | | uint64_t best_mse = (uint64_t)1 << 63; |
1274 | | best_gi = 0; |
1275 | | // skip this loop for SBs that are skipped in the search |
1276 | | for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) { |
1277 | | uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]]; |
1278 | | curr += mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]]; |
1279 | | if (curr < best_mse) { |
1280 | | best_gi = gi; |
1281 | | best_mse = curr; |
1282 | | } |
1283 | | } |
1284 | | |
1285 | | pcs->mi_grid_base[sb_index[i]]->cdef_strength = (int8_t)best_gi; |
1286 | | //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64. |
1287 | | //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data. |
1288 | | BlockSize bsize = pcs->mi_grid_base[sb_index[i]]->bsize; |
1289 | | |
1290 | | switch (bsize) { |
1291 | | case BLOCK_128X128: |
1292 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi; |
1293 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi; |
1294 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = (int8_t) |
1295 | | best_gi; |
1296 | | break; |
1297 | | case BLOCK_128X64: |
1298 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi; |
1299 | | break; |
1300 | | case BLOCK_64X128: |
1301 | | pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi; |
1302 | | break; |
1303 | | default: |
1304 | | break; |
1305 | | } |
1306 | | } |
1307 | | int filter_map[TOTAL_STRENGTHS] = {0}; |
1308 | | for (i = 0; i < first_pass_fs_num; i++) { |
1309 | | filter_map[i] = cdef_search_ctrls->default_first_pass_fs[i]; |
1310 | | } |
1311 | | for (i = first_pass_fs_num; i < (first_pass_fs_num + default_second_pass_fs_num); i++) { |
1312 | | filter_map[i] = cdef_search_ctrls->default_second_pass_fs[i - first_pass_fs_num]; |
1313 | | } |
1314 | | |
1315 | | for (i = 0; i < ppcs->nb_cdef_strengths; i++) { |
1316 | | frm_hdr->cdef_params.cdef_y_strength[i] = filter_map[frm_hdr->cdef_params.cdef_y_strength[i]]; |
1317 | | frm_hdr->cdef_params.cdef_uv_strength[i] = filter_map[frm_hdr->cdef_params.cdef_uv_strength[i]]; |
1318 | | } |
1319 | | //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping |
1320 | | frm_hdr->cdef_params.cdef_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6); |
1321 | | EB_FREE_ARRAY(mse[0]); |
1322 | | EB_FREE_ARRAY(mse[1]); |
1323 | | EB_FREE_ARRAY(sb_index); |
1324 | | EB_FREE_ARRAY(sb_addr); |
1325 | | } |
1326 | | #endif |