/work/svt-av1/Source/Lib/Codec/cdef_process.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 2 Clause License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | #include <stdlib.h> |
14 | | #include "aom_dsp_rtcd.h" |
15 | | #include "definitions.h" |
16 | | #include "enc_handle.h" |
17 | | #include "cdef_process.h" |
18 | | #include "enc_dec_results.h" |
19 | | #include "svt_threads.h" |
20 | | #include "reference_object.h" |
21 | | #include "enc_cdef.h" |
22 | | #include "enc_dec_process.h" |
23 | | #include "pic_buffer_desc.h" |
24 | | #include "sequence_control_set.h" |
25 | | #include "utility.h" |
26 | | #include "pcs.h" |
27 | | #include "resize.h" |
28 | | #include "super_res.h" |
29 | | |
30 | 0 | static void set_unscaled_input_16bit(PictureControlSet* pcs) { |
31 | 0 | EbPictureBufferDesc* input_pic = pcs->ppcs->enhanced_unscaled_pic; |
32 | 0 | EbPictureBufferDesc* output_pic = pcs->input_frame16bit; |
33 | 0 | uint16_t ss_x = pcs->ppcs->scs->subsampling_x; |
34 | 0 | uint16_t ss_y = pcs->ppcs->scs->subsampling_y; |
35 | 0 | svt_aom_copy_buffer_info(input_pic, pcs->input_frame16bit); |
36 | 0 | if (input_pic->bit_depth == EB_EIGHT_BIT) { |
37 | 0 | svt_aom_convert_pic_8bit_to_16bit(input_pic, output_pic, ss_x, ss_y); |
38 | 0 | } else { |
39 | 0 | uint16_t* planes[3] = { |
40 | 0 | (uint16_t*)output_pic->y_buffer, (uint16_t*)output_pic->u_buffer, (uint16_t*)output_pic->v_buffer}; |
41 | 0 | svt_aom_pack_2d_pic(input_pic, planes); |
42 | 0 | } |
43 | 0 | } |
44 | | |
45 | | static EbErrorType copy_recon_enc(SequenceControlSet* scs, EbPictureBufferDesc* recon_picture_src, |
46 | 0 | EbPictureBufferDesc* recon_picture_dst, int num_planes, int skip_copy) { |
47 | 0 | recon_picture_dst->border = recon_picture_src->border; |
48 | 0 | recon_picture_dst->width = recon_picture_src->width; |
49 | 0 | recon_picture_dst->height = recon_picture_src->height; |
50 | 0 | recon_picture_dst->max_width = recon_picture_src->max_width; |
51 | 0 | recon_picture_dst->max_height = recon_picture_src->max_height; |
52 | 0 | recon_picture_dst->bit_depth = recon_picture_src->bit_depth; |
53 | 0 | recon_picture_dst->color_format = recon_picture_src->color_format; |
54 | |
|
55 | 0 | recon_picture_dst->y_stride = recon_picture_src->y_stride; |
56 | 0 | recon_picture_dst->u_stride = recon_picture_src->u_stride; |
57 | 0 | recon_picture_dst->v_stride = recon_picture_src->v_stride; |
58 | |
|
59 | 0 | recon_picture_dst->luma_size = recon_picture_src->luma_size; |
60 | 0 | recon_picture_dst->chroma_size = recon_picture_src->chroma_size; |
61 | 0 | recon_picture_dst->packed_flag = recon_picture_src->packed_flag; |
62 | |
|
63 | 0 | recon_picture_dst->y_stride_bit_inc = recon_picture_src->y_stride_bit_inc; |
64 | 0 | recon_picture_dst->u_stride_bit_inc = recon_picture_src->u_stride_bit_inc; |
65 | 0 | recon_picture_dst->v_stride_bit_inc = recon_picture_src->v_stride_bit_inc; |
66 | |
|
67 | 0 | recon_picture_dst->buffer_enable_mask = scs->seq_header.color_config.mono_chrome ? PICTURE_BUFFER_DESC_LUMA_MASK |
68 | 0 | : PICTURE_BUFFER_DESC_FULL_MASK; |
69 | |
|
70 | 0 | int ss_x = scs->subsampling_x; |
71 | 0 | int ss_y = scs->subsampling_y; |
72 | 0 | uint32_t bytes_per_pixel = scs->is_16bit_pipeline ? 2 : 1; |
73 | | |
74 | | // Get frame size to alloc |
75 | 0 | uint32_t alloc_sz = 0; |
76 | 0 | uint32_t buffer_size[3] = {0}; |
77 | 0 | if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Y_FLAG) { |
78 | 0 | alloc_sz += buffer_size[0] = recon_picture_dst->luma_size * bytes_per_pixel; |
79 | 0 | } |
80 | |
|
81 | 0 | if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cb_FLAG) { |
82 | 0 | alloc_sz += buffer_size[1] = recon_picture_dst->chroma_size * bytes_per_pixel; |
83 | 0 | } |
84 | |
|
85 | 0 | if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cr_FLAG) { |
86 | 0 | alloc_sz += buffer_size[2] = recon_picture_dst->chroma_size * bytes_per_pixel; |
87 | 0 | } |
88 | | |
89 | | // Allocate the Picture Buffers (luma & chroma) |
90 | 0 | EB_CALLOC_ALIGNED_ARRAY(recon_picture_dst->buffer_alloc, alloc_sz); |
91 | 0 | recon_picture_dst->buffer_alloc_sz = alloc_sz; |
92 | 0 | uint32_t assigned_space = 0; |
93 | 0 | if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Y_FLAG) { |
94 | 0 | recon_picture_dst->y_buffer = recon_picture_dst->buffer_alloc + |
95 | 0 | (recon_picture_dst->border + (recon_picture_dst->y_stride * recon_picture_dst->border)) * bytes_per_pixel; |
96 | 0 | assigned_space += buffer_size[0]; |
97 | 0 | } else { |
98 | 0 | recon_picture_dst->y_buffer = NULL; |
99 | 0 | } |
100 | |
|
101 | 0 | if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cb_FLAG) { |
102 | 0 | recon_picture_dst->u_buffer = recon_picture_dst->buffer_alloc + assigned_space + |
103 | 0 | ((recon_picture_dst->border >> ss_x) + |
104 | 0 | (recon_picture_dst->u_stride * (recon_picture_dst->border >> ss_y))) * |
105 | 0 | bytes_per_pixel; |
106 | 0 | assigned_space += buffer_size[1]; |
107 | 0 | } else { |
108 | 0 | recon_picture_dst->u_buffer = NULL; |
109 | 0 | } |
110 | |
|
111 | 0 | if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cr_FLAG) { |
112 | 0 | recon_picture_dst->v_buffer = recon_picture_dst->buffer_alloc + assigned_space + |
113 | 0 | ((recon_picture_dst->border >> ss_x) + |
114 | 0 | (recon_picture_dst->v_stride * (recon_picture_dst->border >> ss_y))) * |
115 | 0 | bytes_per_pixel; |
116 | 0 | assigned_space += buffer_size[2]; |
117 | 0 | } else { |
118 | 0 | recon_picture_dst->v_buffer = NULL; |
119 | 0 | } |
120 | 0 | assert(assigned_space == alloc_sz); |
121 | |
|
122 | 0 | int use_highbd = scs->is_16bit_pipeline; |
123 | |
|
124 | 0 | if (!skip_copy) { |
125 | 0 | assert(num_planes <= MAX_PLANES); |
126 | 0 | for (int plane = 0; plane < num_planes; ++plane) { |
127 | 0 | uint8_t *src_buf, *dst_buf; |
128 | 0 | int32_t src_stride, dst_stride; |
129 | |
|
130 | 0 | int sub_x = plane ? scs->subsampling_x : 0; |
131 | 0 | int sub_y = plane ? scs->subsampling_y : 0; |
132 | |
|
133 | 0 | src_buf = recon_picture_src->buffer[plane]; |
134 | 0 | src_stride = recon_picture_src->stride[plane]; |
135 | 0 | dst_buf = recon_picture_dst->buffer[plane]; |
136 | 0 | dst_stride = recon_picture_dst->stride[plane]; |
137 | |
|
138 | 0 | int height = ((recon_picture_src->height + sub_y) >> sub_y); |
139 | 0 | for (int row = 0; row < height; ++row) { |
140 | 0 | svt_memcpy( |
141 | 0 | dst_buf, src_buf, ((recon_picture_src->width + sub_x) >> sub_x) * sizeof(*src_buf) << use_highbd); |
142 | 0 | src_buf += src_stride << use_highbd; |
143 | 0 | dst_buf += dst_stride << use_highbd; |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } |
147 | |
|
148 | 0 | return EB_ErrorNone; |
149 | 0 | } |
150 | | |
151 | 0 | static void svt_av1_superres_upscale_frame(struct Av1Common* cm, PictureControlSet* pcs, SequenceControlSet* scs) { |
152 | | // Set these parameters for testing since they are not correctly populated yet |
153 | 0 | EbPictureBufferDesc* recon_ptr; |
154 | |
|
155 | 0 | bool is_16bit = scs->is_16bit_pipeline; |
156 | |
|
157 | 0 | svt_aom_get_recon_pic(pcs, &recon_ptr, is_16bit); |
158 | |
|
159 | 0 | uint16_t ss_x = scs->subsampling_x; |
160 | 0 | uint16_t ss_y = scs->subsampling_y; |
161 | 0 | const int num_planes = scs->seq_header.color_config.mono_chrome ? 1 : MAX_PLANES; |
162 | |
|
163 | 0 | EbPictureBufferDesc recon_pic_temp; |
164 | 0 | EbPictureBufferDesc* ps_recon_pic_temp; |
165 | 0 | ps_recon_pic_temp = &recon_pic_temp; |
166 | |
|
167 | 0 | EbErrorType return_error = copy_recon_enc(scs, recon_ptr, ps_recon_pic_temp, num_planes, 0); |
168 | |
|
169 | 0 | if (return_error != EB_ErrorNone) { |
170 | 0 | ps_recon_pic_temp = NULL; |
171 | 0 | assert(0); |
172 | 0 | } |
173 | |
|
174 | 0 | EbPictureBufferDesc* src = ps_recon_pic_temp; |
175 | 0 | EbPictureBufferDesc* dst = recon_ptr; |
176 | | |
177 | | // get the bit-depth from the encoder config instead of from the recon ptr |
178 | 0 | int bit_depth = scs->static_config.encoder_bit_depth; |
179 | |
|
180 | 0 | assert(num_planes <= MAX_PLANES); |
181 | 0 | for (int plane = 0; plane < num_planes; ++plane) { |
182 | 0 | uint8_t *src_buf, *dst_buf; |
183 | 0 | int32_t src_stride, dst_stride; |
184 | |
|
185 | 0 | int sub_x = plane ? ss_x : 0; |
186 | 0 | int sub_y = plane ? ss_y : 0; |
187 | 0 | src_buf = src->buffer[plane]; |
188 | 0 | src_stride = src->stride[plane]; |
189 | 0 | dst_buf = dst->buffer[plane]; |
190 | 0 | dst_stride = dst->stride[plane]; |
191 | |
|
192 | 0 | svt_av1_upscale_normative_rows(cm, |
193 | 0 | (const uint8_t*)src_buf, |
194 | 0 | src_stride, |
195 | 0 | dst_buf, |
196 | 0 | dst_stride, |
197 | 0 | (src->height + sub_y) >> sub_y, |
198 | 0 | sub_x, |
199 | 0 | bit_depth, |
200 | 0 | is_16bit); |
201 | 0 | } |
202 | | |
203 | | // free the memory |
204 | 0 | EB_FREE_ALIGNED_ARRAY(ps_recon_pic_temp->buffer_alloc); |
205 | 0 | } |
206 | | |
207 | | /************************************** |
208 | | * Cdef Context |
209 | | **************************************/ |
210 | | typedef struct CdefContext { |
211 | | EbFifo* cdef_input_fifo_ptr; |
212 | | EbFifo* cdef_output_fifo_ptr; |
213 | | } CdefContext; |
214 | | |
215 | 474 | static void cdef_context_dctor(EbPtr p) { |
216 | 474 | EbThreadContext* thread_ctx = (EbThreadContext*)p; |
217 | 474 | CdefContext* obj = (CdefContext*)thread_ctx->priv; |
218 | 474 | EB_FREE_ARRAY(obj); |
219 | 474 | } |
220 | | |
221 | | /****************************************************** |
222 | | * Cdef Context Constructor |
223 | | ******************************************************/ |
224 | 474 | EbErrorType svt_aom_cdef_context_ctor(EbThreadContext* thread_ctx, const EbEncHandle* enc_handle_ptr, int index) { |
225 | 474 | CdefContext* cdef_ctx; |
226 | 474 | EB_CALLOC_ARRAY(cdef_ctx, 1); |
227 | 474 | thread_ctx->priv = cdef_ctx; |
228 | 474 | thread_ctx->dctor = cdef_context_dctor; |
229 | | |
230 | | // Input/Output System Resource Manager FIFOs |
231 | 474 | cdef_ctx->cdef_input_fifo_ptr = svt_system_resource_get_consumer_fifo(enc_handle_ptr->dlf_results_resource_ptr, |
232 | 474 | index); |
233 | 474 | cdef_ctx->cdef_output_fifo_ptr = svt_system_resource_get_producer_fifo(enc_handle_ptr->cdef_results_resource_ptr, |
234 | 474 | index); |
235 | | |
236 | 474 | return EB_ErrorNone; |
237 | 474 | } |
238 | | |
239 | 0 | #define default_mse_uv 1040400 |
240 | | |
241 | | static uint64_t compute_cdef_dist(const EbByte dst, int32_t doffset, int32_t dstride, const uint8_t* src, |
242 | | const CdefList* dlist, int32_t cdef_count, BlockSize bsize, int32_t coeff_shift, |
243 | 0 | uint8_t subsampling_factor, bool is_16bit) { |
244 | 0 | uint64_t curr_mse = 0; |
245 | 0 | if (is_16bit) { |
246 | 0 | curr_mse = svt_compute_cdef_dist_16bit(((uint16_t*)dst) + doffset, |
247 | 0 | dstride, |
248 | 0 | (uint16_t*)src, |
249 | 0 | dlist, |
250 | 0 | cdef_count, |
251 | 0 | bsize, |
252 | 0 | coeff_shift, |
253 | 0 | subsampling_factor); |
254 | |
|
255 | 0 | } else { |
256 | 0 | curr_mse = svt_compute_cdef_dist_8bit( |
257 | 0 | dst + doffset, dstride, src, dlist, cdef_count, bsize, coeff_shift, subsampling_factor); |
258 | 0 | } |
259 | 0 | return curr_mse; |
260 | 0 | } |
261 | | |
262 | | /* Search for the best filter strength pair for each 64x64 filter block. |
263 | | * |
264 | | * For each 64x64 filter block and each plane, search the allowable filter strength pairs. |
265 | | * Call cdef_filter_fb() to perform filtering, then compute the MSE for each pair. |
266 | | */ |
267 | 0 | static void cdef_seg_search(PictureControlSet* pcs, SequenceControlSet* scs, uint32_t segment_index) { |
268 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
269 | 0 | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
270 | 0 | Av1Common* cm = ppcs->av1_cm; |
271 | 0 | const bool is_16bit = scs->is_16bit_pipeline; |
272 | 0 | uint32_t x_seg_idx; |
273 | 0 | uint32_t y_seg_idx; |
274 | 0 | const uint32_t b64_pic_width = (ppcs->aligned_width + 64 - 1) / 64; |
275 | 0 | const uint32_t b64_pic_height = (ppcs->aligned_height + 64 - 1) / 64; |
276 | 0 | SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_seg_idx, y_seg_idx, pcs->cdef_segments_column_count); |
277 | 0 | const uint32_t x_b64_start_idx = SEGMENT_START_IDX(x_seg_idx, b64_pic_width, pcs->cdef_segments_column_count); |
278 | 0 | const uint32_t x_b64_end_idx = SEGMENT_END_IDX(x_seg_idx, b64_pic_width, pcs->cdef_segments_column_count); |
279 | 0 | const uint32_t y_b64_start_idx = SEGMENT_START_IDX(y_seg_idx, b64_pic_height, pcs->cdef_segments_row_count); |
280 | 0 | const uint32_t y_b64_end_idx = SEGMENT_END_IDX(y_seg_idx, b64_pic_height, pcs->cdef_segments_row_count); |
281 | |
|
282 | 0 | const int32_t mi_rows = cm->mi_rows; |
283 | 0 | const int32_t mi_cols = cm->mi_cols; |
284 | 0 | CdefSearchControls* cdef_ctrls = &ppcs->cdef_search_ctrls; |
285 | 0 | const int first_pass_fs_num = cdef_ctrls->first_pass_fs_num; |
286 | 0 | const int default_second_pass_fs_num = cdef_ctrls->default_second_pass_fs_num; |
287 | 0 | EbByte src[3]; |
288 | 0 | EbByte ref[3]; |
289 | 0 | int32_t stride_src[3]; |
290 | 0 | int32_t stride_ref[3]; |
291 | 0 | int32_t plane_bsize[3]; |
292 | 0 | int32_t mi_wide_l2[3]; |
293 | 0 | int32_t mi_high_l2[3]; |
294 | 0 | int32_t xdec[3]; |
295 | 0 | int32_t ydec[3]; |
296 | 0 | int32_t cdef_count; |
297 | 0 | const int32_t coeff_shift = AOMMAX(scs->static_config.encoder_bit_depth - 8, 0); |
298 | 0 | const int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
299 | 0 | const int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
300 | 0 | const int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6); |
301 | 0 | const int32_t sec_damping = pri_damping; |
302 | 0 | const int32_t num_planes = 3; |
303 | 0 | CdefList dlist[MI_SIZE_128X128 * MI_SIZE_128X128]; |
304 | |
|
305 | 0 | int32_t toff_prev = CDEF_VBORDER; |
306 | 0 | int32_t loff_prev = CDEF_HBORDER; |
307 | 0 | int32_t ysize_prev = (1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER; |
308 | 0 | int32_t xsize_prev = (1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_HBORDER; |
309 | 0 | DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]); |
310 | 0 | uint16_t* in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER; |
311 | | // tmp_dst is uint16_t to accommodate high bit depth content; 8bit will treat it as a uint8_t |
312 | | // buffer and will not use half of the buffer |
313 | 0 | DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); |
314 | |
|
315 | 0 | EbPictureBufferDesc* input_pic = is_16bit ? pcs->input_frame16bit : ppcs->enhanced_pic; |
316 | 0 | EbPictureBufferDesc* recon_pic; |
317 | 0 | svt_aom_get_recon_pic(pcs, &recon_pic, is_16bit); |
318 | |
|
319 | 0 | for (int pli = 0; pli < num_planes; pli++) { |
320 | 0 | const int subsampling_x = (pli == 0) ? 0 : 1; |
321 | 0 | const int subsampling_y = (pli == 0) ? 0 : 1; |
322 | 0 | xdec[pli] = subsampling_x; |
323 | 0 | ydec[pli] = subsampling_y; |
324 | | // The checks are stubs for 4:2:2 and 4:4:4 support |
325 | | // cppcheck-suppress knownConditionTrueFalse |
326 | 0 | plane_bsize[pli] = subsampling_y ? (subsampling_x ? BLOCK_4X4 : BLOCK_8X4) |
327 | 0 | : (subsampling_x ? BLOCK_4X8 : BLOCK_8X8); |
328 | 0 | mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; |
329 | 0 | mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; |
330 | 0 | src[pli] = pcs->cdef_input_recon[pli]; |
331 | 0 | ref[pli] = pcs->cdef_input_source[pli]; |
332 | 0 | stride_src[pli] = pli == 0 ? recon_pic->y_stride : (pli == 1 ? recon_pic->u_stride : recon_pic->v_stride); |
333 | 0 | stride_ref[pli] = pli == 0 ? input_pic->y_stride : (pli == 1 ? input_pic->u_stride : input_pic->v_stride); |
334 | 0 | } |
335 | | |
336 | | // Loop over all filter blocks (64x64) |
337 | 0 | for (uint32_t fbr = y_b64_start_idx; fbr < y_b64_end_idx; ++fbr) { |
338 | 0 | for (uint32_t fbc = x_b64_start_idx; fbc < x_b64_end_idx; ++fbc) { |
339 | 0 | int32_t dirinit = 0; |
340 | 0 | const uint32_t lc = MI_SIZE_64X64 * fbc; |
341 | 0 | const uint32_t lr = MI_SIZE_64X64 * fbr; |
342 | 0 | int nhb = AOMMIN(MI_SIZE_64X64, mi_cols - lc); |
343 | 0 | int nvb = AOMMIN(MI_SIZE_64X64, mi_rows - lr); |
344 | 0 | int hb_step = 1; //these should be all time with 64x64 SBs |
345 | 0 | int vb_step = 1; |
346 | 0 | BlockSize bs = BLOCK_64X64; |
347 | 0 | const MbModeInfo* mbmi = pcs->mi_grid_base[lr * cm->mi_stride + lc]; |
348 | 0 | const BlockSize bsize = mbmi->bsize; |
349 | 0 | if (((fbc & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_128X64)) || |
350 | 0 | ((fbr & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_64X128))) { |
351 | 0 | continue; |
352 | 0 | } |
353 | 0 | if (bsize == BLOCK_128X128 || bsize == BLOCK_128X64 || bsize == BLOCK_64X128) { |
354 | 0 | bs = bsize; |
355 | 0 | } |
356 | |
|
357 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { |
358 | 0 | nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - lc); |
359 | 0 | hb_step = 2; |
360 | 0 | } |
361 | 0 | if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { |
362 | 0 | nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - lr); |
363 | 0 | vb_step = 2; |
364 | 0 | } |
365 | 0 | const uint32_t fb_idx = fbr * nhfb + fbc; |
366 | | // No filtering if the entire filter block is skipped |
367 | 0 | cdef_count = svt_sb_compute_cdef_list(pcs, cm, lr, lc, dlist, bs); |
368 | 0 | if (cdef_count == 0) { |
369 | 0 | pcs->skip_cdef_seg[fb_idx] = 1; |
370 | 0 | continue; |
371 | 0 | } |
372 | 0 | pcs->skip_cdef_seg[fb_idx] = 0; |
373 | |
|
374 | 0 | int32_t toff = CDEF_VBORDER * (fbr != 0); |
375 | 0 | int32_t loff = CDEF_HBORDER * (fbc != 0); |
376 | 0 | int32_t boff = CDEF_VBORDER * ((int32_t)fbr + vb_step < nvfb); |
377 | 0 | int32_t roff = CDEF_HBORDER * ((int32_t)fbc + hb_step < nhfb); |
378 | |
|
379 | 0 | uint8_t (*dir)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fb_idx].dir; |
380 | 0 | int32_t (*var)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fb_idx].var; |
381 | 0 | for (int pli = 0; pli < num_planes; pli++) { |
382 | 0 | int32_t ysize = (nvb << mi_high_l2[pli]) + boff + toff; |
383 | 0 | int32_t xsize = (nhb << mi_wide_l2[pli]) + roff + loff; |
384 | | /* We avoid filtering the pixels for which some of the pixels to |
385 | | average are outside the frame. We could change the filter instead, |
386 | | but it would add special cases for any future vectorization. |
387 | | Avoid memset'ting when dirty rect is inside the new one. |
388 | | TODO: this could be further optimized - fill out only borders, separate buffers for Y & UV */ |
389 | 0 | bool need_to_reset = toff_prev > toff || loff_prev > loff || ysize < ysize_prev || xsize < xsize_prev; |
390 | 0 | if (need_to_reset) { |
391 | 0 | uint16_t* p = &in[(-toff_prev * CDEF_BSTRIDE - loff_prev)]; |
392 | 0 | for (int r = 0; r < ysize_prev; r++) { |
393 | 0 | svt_memset(p, (uint8_t)CDEF_VERY_LARGE, sizeof(p[0]) * xsize_prev); |
394 | 0 | p += CDEF_BSTRIDE; |
395 | 0 | } |
396 | 0 | } |
397 | 0 | toff_prev = toff; |
398 | 0 | loff_prev = loff; |
399 | 0 | ysize_prev = ysize; |
400 | 0 | xsize_prev = xsize; |
401 | |
|
402 | 0 | svt_aom_copy_sb8_16(&in[(-toff * CDEF_BSTRIDE - loff)], |
403 | 0 | CDEF_BSTRIDE, |
404 | 0 | src[pli], |
405 | 0 | (lr << mi_high_l2[pli]) - toff, |
406 | 0 | (lc << mi_wide_l2[pli]) - loff, |
407 | 0 | stride_src[pli], |
408 | 0 | ysize, |
409 | 0 | xsize, |
410 | 0 | is_16bit); |
411 | |
|
412 | 0 | uint8_t subsampling_factor = cdef_ctrls->subsampling_factor; |
413 | | /* |
414 | | Cap the subsampling for certain block sizes. |
415 | | |
416 | | The intrinsics process several lines simultaneously, so blocks can only be subsampled |
417 | | a finite amount before there is no more speed gain. If the space between processed lines |
418 | | is too large, the intrinsics will begin accessing memory outside the block. |
419 | | */ |
420 | 0 | switch (plane_bsize[pli]) { |
421 | 0 | case BLOCK_8X8: |
422 | 0 | subsampling_factor = MIN(subsampling_factor, 4); |
423 | 0 | break; |
424 | 0 | case BLOCK_8X4: |
425 | 0 | case BLOCK_4X8: |
426 | 0 | subsampling_factor = MIN(subsampling_factor, 2); |
427 | 0 | break; |
428 | 0 | case BLOCK_4X4: |
429 | 0 | subsampling_factor = MIN(subsampling_factor, 1); |
430 | 0 | break; |
431 | 0 | } |
432 | | |
433 | | /* first cdef stage |
434 | | * Perform the pri_filter strength search for the current sub_block |
435 | | */ |
436 | 0 | for (int gi = 0; gi < first_pass_fs_num; gi++) { |
437 | | // Check if chroma filter is set to be tested |
438 | 0 | if (pli && (cdef_ctrls->default_first_pass_fs_uv[gi] == -1)) { |
439 | 0 | pcs->mse_seg[1][fb_idx][gi] = default_mse_uv * 64; |
440 | 0 | continue; |
441 | 0 | } |
442 | | |
443 | 0 | int32_t pri_strength = cdef_ctrls->default_first_pass_fs[gi] / CDEF_SEC_STRENGTHS; |
444 | 0 | int32_t sec_strength = cdef_ctrls->default_first_pass_fs[gi] % CDEF_SEC_STRENGTHS; |
445 | |
|
446 | 0 | svt_cdef_filter_fb(is_16bit ? NULL : (uint8_t*)tmp_dst, |
447 | 0 | is_16bit ? tmp_dst : NULL, |
448 | 0 | 0, |
449 | 0 | in, |
450 | 0 | xdec[pli], |
451 | 0 | ydec[pli], |
452 | 0 | *dir, |
453 | 0 | &dirinit, |
454 | 0 | *var, |
455 | 0 | pli, |
456 | 0 | dlist, |
457 | 0 | cdef_count, |
458 | 0 | pri_strength, |
459 | 0 | sec_strength + (sec_strength == 3), |
460 | 0 | pri_damping, |
461 | 0 | sec_damping, |
462 | 0 | coeff_shift, |
463 | 0 | subsampling_factor); |
464 | 0 | uint64_t curr_mse = compute_cdef_dist( |
465 | 0 | ref[pli], |
466 | 0 | (lr << mi_high_l2[pli]) * stride_ref[pli] + (lc << mi_wide_l2[pli]), |
467 | 0 | stride_ref[pli], |
468 | 0 | (uint8_t*)tmp_dst, |
469 | 0 | dlist, |
470 | 0 | cdef_count, |
471 | 0 | (BlockSize)plane_bsize[pli], |
472 | 0 | coeff_shift, |
473 | 0 | subsampling_factor, |
474 | 0 | is_16bit); |
475 | |
|
476 | 0 | if (pli < 2) { |
477 | 0 | pcs->mse_seg[pli][fb_idx][gi] = curr_mse * subsampling_factor; |
478 | 0 | } else { |
479 | 0 | pcs->mse_seg[1][fb_idx][gi] += (curr_mse * subsampling_factor); |
480 | 0 | } |
481 | 0 | } |
482 | | |
483 | | /* second cdef stage |
484 | | * Perform the sec_filter strength search for the current sub_block |
485 | | */ |
486 | 0 | for (int gi = first_pass_fs_num; gi < first_pass_fs_num + default_second_pass_fs_num; gi++) { |
487 | | // Check if chroma filter is set to be tested |
488 | 0 | if (pli && (cdef_ctrls->default_second_pass_fs_uv[gi - first_pass_fs_num] == -1)) { |
489 | 0 | pcs->mse_seg[1][fb_idx][gi] = default_mse_uv * 64; |
490 | 0 | continue; |
491 | 0 | } |
492 | | |
493 | 0 | int32_t pri_strength = cdef_ctrls->default_second_pass_fs[gi - first_pass_fs_num] / |
494 | 0 | CDEF_SEC_STRENGTHS; |
495 | 0 | int32_t sec_strength = cdef_ctrls->default_second_pass_fs[gi - first_pass_fs_num] % |
496 | 0 | CDEF_SEC_STRENGTHS; |
497 | |
|
498 | 0 | svt_cdef_filter_fb(is_16bit ? NULL : (uint8_t*)tmp_dst, |
499 | 0 | is_16bit ? tmp_dst : NULL, |
500 | 0 | 0, |
501 | 0 | in, |
502 | 0 | xdec[pli], |
503 | 0 | ydec[pli], |
504 | 0 | *dir, |
505 | 0 | &dirinit, |
506 | 0 | *var, |
507 | 0 | pli, |
508 | 0 | dlist, |
509 | 0 | cdef_count, |
510 | 0 | pri_strength, |
511 | 0 | sec_strength + (sec_strength == 3), |
512 | 0 | pri_damping, |
513 | 0 | sec_damping, |
514 | 0 | coeff_shift, |
515 | 0 | subsampling_factor); |
516 | 0 | uint64_t curr_mse = compute_cdef_dist( |
517 | 0 | ref[pli], |
518 | 0 | (lr << mi_high_l2[pli]) * stride_ref[pli] + (lc << mi_wide_l2[pli]), |
519 | 0 | stride_ref[pli], |
520 | 0 | (uint8_t*)tmp_dst, |
521 | 0 | dlist, |
522 | 0 | cdef_count, |
523 | 0 | (BlockSize)plane_bsize[pli], |
524 | 0 | coeff_shift, |
525 | 0 | subsampling_factor, |
526 | 0 | is_16bit); |
527 | |
|
528 | 0 | if (pli < 2) { |
529 | 0 | pcs->mse_seg[pli][fb_idx][gi] = curr_mse * subsampling_factor; |
530 | 0 | } else { |
531 | 0 | pcs->mse_seg[1][fb_idx][gi] += (curr_mse * subsampling_factor); |
532 | 0 | } |
533 | 0 | } |
534 | 0 | } |
535 | 0 | } |
536 | 0 | } |
537 | 0 | } |
538 | | |
539 | | /****************************************************** |
540 | | * CDEF Kernel |
541 | | ******************************************************/ |
542 | 948 | EbErrorType svt_aom_cdef_kernel_iter(void* context) { |
543 | | // Context & SCS & PCS |
544 | 948 | CdefContext* context_ptr = (CdefContext*)context; |
545 | 948 | PictureControlSet* pcs; |
546 | 948 | SequenceControlSet* scs; |
547 | | |
548 | | //// Input |
549 | 948 | EbObjectWrapper* dlf_results_wrapper; |
550 | 948 | DlfResults* dlf_results; |
551 | | |
552 | | //// Output |
553 | 948 | EbObjectWrapper* cdef_results_wrapper; |
554 | | |
555 | 948 | FrameHeader* frm_hdr; |
556 | | |
557 | | // Get DLF Results |
558 | 948 | EB_GET_FULL_OBJECT(context_ptr->cdef_input_fifo_ptr, &dlf_results_wrapper); |
559 | | |
560 | 474 | dlf_results = (DlfResults*)dlf_results_wrapper->object_ptr; |
561 | 474 | pcs = (PictureControlSet*)dlf_results->pcs_wrapper->object_ptr; |
562 | 474 | PictureParentControlSet* ppcs = pcs->ppcs; |
563 | 474 | scs = pcs->scs; |
564 | | |
565 | 474 | bool is_16bit = scs->is_16bit_pipeline; |
566 | 474 | Av1Common* cm = pcs->ppcs->av1_cm; |
567 | 474 | frm_hdr = &pcs->ppcs->frm_hdr; |
568 | 474 | CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls; |
569 | 474 | if (!cdef_search_ctrls->use_reference_cdef_fs && !cdef_search_ctrls->use_qp_strength) { |
570 | 0 | if (scs->seq_header.cdef_level && pcs->ppcs->cdef_level) { |
571 | 0 | cdef_seg_search(pcs, scs, dlf_results->segment_index); |
572 | 0 | } |
573 | 0 | } |
574 | | //all seg based search is done. update total processed segments. if all done, finish the search and perfrom application. |
575 | 474 | svt_block_on_mutex(pcs->cdef_search_mutex); |
576 | | |
577 | 474 | pcs->tot_seg_searched_cdef++; |
578 | 474 | if (pcs->tot_seg_searched_cdef == pcs->cdef_segments_total_count) { |
579 | 474 | pcs->cdef_dist_dev = -1; |
580 | 474 | if (scs->seq_header.cdef_level && pcs->ppcs->cdef_level) { |
581 | 274 | finish_cdef_search(pcs); |
582 | 274 | if (ppcs->enable_restoration || pcs->ppcs->is_ref || scs->static_config.recon_enabled) { |
583 | | // Do application iff there are non-zero filters |
584 | 0 | if (frm_hdr->cdef_params.cdef_y_strength[0] != 0 || frm_hdr->cdef_params.cdef_uv_strength[0] != 0 || |
585 | 0 | pcs->ppcs->nb_cdef_strengths != 1) { |
586 | 0 | svt_av1_cdef_frame(scs, pcs); |
587 | 0 | } |
588 | 0 | } |
589 | 274 | } else { |
590 | 200 | frm_hdr->cdef_params.cdef_bits = 0; |
591 | 200 | frm_hdr->cdef_params.cdef_y_strength[0] = 0; |
592 | 200 | pcs->ppcs->nb_cdef_strengths = 1; |
593 | 200 | frm_hdr->cdef_params.cdef_uv_strength[0] = 0; |
594 | 200 | } |
595 | | |
596 | 474 | if (pcs->ppcs->nb_cdef_strengths == 1 && frm_hdr->cdef_params.cdef_y_strength[0] == 0 && |
597 | 233 | frm_hdr->cdef_params.cdef_uv_strength[0] == 0) { |
598 | 224 | pcs->cdef_dist_dev = 0; |
599 | 224 | } |
600 | | |
601 | | //restoration prep |
602 | 474 | bool is_lr = ppcs->enable_restoration && frm_hdr->allow_intrabc == 0; |
603 | 474 | if (is_lr) { |
604 | 0 | svt_av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1); |
605 | 0 | if (is_16bit) { |
606 | 0 | set_unscaled_input_16bit(pcs); |
607 | 0 | } |
608 | 0 | } |
609 | | |
610 | | // ------- start: Normative upscaling - super-resolution tool |
611 | 474 | if (frm_hdr->allow_intrabc == 0 && pcs->ppcs->frame_superres_enabled) { |
612 | 0 | svt_av1_superres_upscale_frame(cm, pcs, scs); |
613 | 0 | } |
614 | 474 | if (scs->static_config.resize_mode != RESIZE_NONE) { |
615 | 0 | EbPictureBufferDesc* recon = NULL; |
616 | 0 | svt_aom_get_recon_pic(pcs, &recon, is_16bit); |
617 | 0 | recon->width = pcs->ppcs->render_width; |
618 | 0 | recon->height = pcs->ppcs->render_height; |
619 | 0 | if (is_lr) { |
620 | 0 | EbPictureBufferDesc* input_pic = is_16bit ? pcs->input_frame16bit : pcs->ppcs->enhanced_unscaled_pic; |
621 | |
|
622 | 0 | svt_aom_assert_err(pcs->scaled_input_pic == NULL, "pcs_ptr->scaled_input_pic is not desctoried!"); |
623 | 0 | EbPictureBufferDesc* scaled_input_pic = NULL; |
624 | | // downscale input picture if recon is resized |
625 | 0 | bool is_resized = recon->width != input_pic->width || recon->height != input_pic->height; |
626 | 0 | if (is_resized) { |
627 | 0 | superres_params_type spr_params = {recon->width, recon->height, 0}; |
628 | 0 | svt_aom_downscaled_source_buffer_desc_ctor(&scaled_input_pic, input_pic, spr_params); |
629 | 0 | svt_aom_resize_frame(input_pic, |
630 | 0 | scaled_input_pic, |
631 | 0 | scs->static_config.encoder_bit_depth, |
632 | 0 | av1_num_planes(&scs->seq_header.color_config), |
633 | 0 | scs->subsampling_x, |
634 | 0 | scs->subsampling_y, |
635 | 0 | input_pic->packed_flag, |
636 | 0 | PICTURE_BUFFER_DESC_FULL_MASK, |
637 | 0 | 0); // is_2bcompress |
638 | 0 | pcs->scaled_input_pic = scaled_input_pic; |
639 | 0 | } |
640 | 0 | } |
641 | 0 | } |
642 | | // ------- end: Normative upscaling - super-resolution tool |
643 | | |
644 | 474 | pcs->rest_segments_column_count = scs->rest_segment_column_count; |
645 | 474 | pcs->rest_segments_row_count = scs->rest_segment_row_count; |
646 | 474 | pcs->rest_segments_total_count = (uint16_t)(pcs->rest_segments_column_count * pcs->rest_segments_row_count); |
647 | 474 | pcs->tot_seg_searched_rest = 0; |
648 | 474 | pcs->ppcs->av1_cm->use_boundaries_in_rest_search = scs->use_boundaries_in_rest_search; |
649 | 474 | pcs->rest_extend_flag[0] = false; |
650 | 474 | pcs->rest_extend_flag[1] = false; |
651 | 474 | pcs->rest_extend_flag[2] = false; |
652 | | |
653 | 474 | uint32_t segment_index; |
654 | 948 | for (segment_index = 0; segment_index < pcs->rest_segments_total_count; ++segment_index) { |
655 | | // Get Empty Cdef Results to Rest |
656 | 474 | svt_get_empty_object(context_ptr->cdef_output_fifo_ptr, &cdef_results_wrapper); |
657 | 474 | CdefResults* cdef_results = (struct CdefResults*)cdef_results_wrapper->object_ptr; |
658 | 474 | cdef_results->pcs_wrapper = dlf_results->pcs_wrapper; |
659 | 474 | cdef_results->segment_index = segment_index; |
660 | | // Post Cdef Results |
661 | 474 | svt_post_full_object(cdef_results_wrapper); |
662 | 474 | } |
663 | 474 | } |
664 | 474 | svt_release_mutex(pcs->cdef_search_mutex); |
665 | | |
666 | | // Release Dlf Results |
667 | 474 | svt_release_object(dlf_results_wrapper); |
668 | | |
669 | 474 | return EB_ErrorNone; |
670 | 948 | } |
671 | | |
672 | 474 | void* svt_aom_cdef_kernel(void* input_ptr) { |
673 | 474 | EbThreadContext* thread_ctx = (EbThreadContext*)input_ptr; |
674 | 948 | for (;;) { |
675 | 948 | EbErrorType err = svt_aom_cdef_kernel_iter(thread_ctx->priv); |
676 | 948 | if (err == EB_NoErrorFifoShutdown) { |
677 | 474 | return NULL; |
678 | 474 | } |
679 | 948 | } |
680 | 0 | return NULL; |
681 | 474 | } |