/work/svt-av1/Source/Lib/Codec/me_process.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "enc_handle.h" |
15 | | #include "utility.h" |
16 | | #include "pcs.h" |
17 | | #include "pd_results.h" |
18 | | #include "me_process.h" |
19 | | #include "me_results.h" |
20 | | #include "reference_object.h" |
21 | | #include "motion_estimation.h" |
22 | | #include "lambda_rate_tables.h" |
23 | | #include "compute_sad.h" |
24 | | #ifdef ARCH_X86_64 |
25 | | #include <emmintrin.h> |
26 | | #endif |
27 | | #include "temporal_filtering.h" |
28 | | #include "global_me.h" |
29 | | |
30 | | #include "resize.h" |
31 | | #include "pic_demux_results.h" |
32 | | #include "rc_tasks.h" |
33 | | #include "firstpass.h" |
34 | | #include "initial_rc_process.h" |
35 | | #include "enc_mode_config.h" |
36 | | |
37 | | /* --32x32- |
38 | | |00||01| |
39 | | |02||03| |
40 | | --------*/ |
41 | | /* ------16x16----- |
42 | | |00||01||04||05| |
43 | | |02||03||06||07| |
44 | | |08||09||12||13| |
45 | | |10||11||14||15| |
46 | | ----------------*/ |
47 | | /* ------8x8---------------------------- |
48 | | |00||01||04||05| |16||17||20||21| |
49 | | |02||03||06||07| |18||19||22||23| |
50 | | |08||09||12||13| |24||25||28||29| |
51 | | |10||11||14||15| |26||27||30||31| |
52 | | |
53 | | |32||33||36||37| |48||49||52||53| |
54 | | |34||35||38||39| |50||51||54||55| |
55 | | |40||41||44||45| |56||57||60||61| |
56 | | |42||43||46||47| |58||59||62||63| |
57 | | -------------------------------------*/ |
58 | | |
59 | | void dg_detector_hme_level0(PictureParentControlSet* ppcs, uint32_t seg_idx); |
60 | | |
61 | 2.84k | static void motion_estimation_context_dctor(EbPtr p) { |
62 | 2.84k | EbThreadContext* thread_ctx = (EbThreadContext*)p; |
63 | 2.84k | MotionEstimationContext_t* obj = (MotionEstimationContext_t*)thread_ctx->priv; |
64 | 2.84k | EB_DELETE(obj->me_ctx); |
65 | 2.84k | EB_FREE_ARRAY(obj); |
66 | 2.84k | } |
67 | | |
68 | | /************************************************ |
69 | | * Motion Analysis Context Constructor |
70 | | ************************************************/ |
71 | | EbErrorType svt_aom_motion_estimation_context_ctor(EbThreadContext* thread_ctx, const EbEncHandle* enc_handle_ptr, |
72 | 2.84k | int index) { |
73 | 2.84k | MotionEstimationContext_t* me_context_ptr; |
74 | | |
75 | 2.84k | EB_CALLOC_ARRAY(me_context_ptr, 1); |
76 | 2.84k | thread_ctx->priv = me_context_ptr; |
77 | 2.84k | thread_ctx->dctor = motion_estimation_context_dctor; |
78 | 2.84k | me_context_ptr->picture_decision_results_input_fifo_ptr = svt_system_resource_get_consumer_fifo( |
79 | 2.84k | enc_handle_ptr->picture_decision_results_resource_ptr, index); |
80 | 2.84k | me_context_ptr->motion_estimation_results_output_fifo_ptr = svt_system_resource_get_producer_fifo( |
81 | 2.84k | enc_handle_ptr->motion_estimation_results_resource_ptr, index); |
82 | 2.84k | EB_NEW(me_context_ptr->me_ctx, svt_aom_me_context_ctor); |
83 | 2.84k | return EB_ErrorNone; |
84 | 2.84k | } |
85 | | |
86 | | /************************************************ |
87 | | * Motion Analysis Kernel |
88 | | * The Motion Analysis performs Motion Estimation |
89 | | * This process has access to the current input picture as well as |
90 | | * the input pictures, which the current picture references according |
91 | | * to the prediction structure pattern. The Motion Analysis process is multithreaded, |
92 | | * so pictures can be processed out of order as long as all inputs are available. |
93 | | ************************************************/ |
94 | 3.31k | EbErrorType svt_aom_motion_estimation_kernel_iter(void* context) { |
95 | 3.31k | MotionEstimationContext_t* me_context_ptr = (MotionEstimationContext_t*)context; |
96 | | |
97 | 3.31k | EbObjectWrapper* in_results_wrapper_ptr; |
98 | 3.31k | EbObjectWrapper* out_results_wrapper; |
99 | | |
100 | | // Get Input Full Object |
101 | 3.31k | EB_GET_FULL_OBJECT(me_context_ptr->picture_decision_results_input_fifo_ptr, &in_results_wrapper_ptr); |
102 | 476 | PictureDecisionResults* in_results_ptr = (PictureDecisionResults*)in_results_wrapper_ptr->object_ptr; |
103 | 476 | PictureParentControlSet* pcs = (PictureParentControlSet*)in_results_ptr->pcs_wrapper->object_ptr; |
104 | 476 | SequenceControlSet* scs = pcs->scs; |
105 | 476 | if (in_results_ptr->task_type == TASK_TFME) { |
106 | 0 | me_context_ptr->me_ctx->me_type = ME_MCTF; |
107 | 476 | } else if (in_results_ptr->task_type == TASK_PAME || in_results_ptr->task_type == TASK_SUPERRES_RE_ME) { |
108 | 474 | me_context_ptr->me_ctx->me_type = ME_OPEN_LOOP; |
109 | 474 | } else if (in_results_ptr->task_type == TASK_DG_DETECTOR_HME) { |
110 | 0 | me_context_ptr->me_ctx->me_type = ME_DG_DETECTOR; |
111 | 0 | } |
112 | | |
113 | | // ME Kernel Signal(s) derivation |
114 | 476 | if ((in_results_ptr->task_type == TASK_PAME) || (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) { |
115 | 474 | svt_aom_sig_deriv_me(scs, pcs, me_context_ptr->me_ctx); |
116 | 474 | } |
117 | | |
118 | 2 | else if (in_results_ptr->task_type == TASK_TFME) { |
119 | 0 | svt_aom_sig_deriv_me_tf(pcs, me_context_ptr->me_ctx); |
120 | 0 | } |
121 | | |
122 | 476 | if ((in_results_ptr->task_type == TASK_PAME) || (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) { |
123 | 474 | EbPictureBufferDesc* sixteenth_picture_ptr; |
124 | 474 | EbPictureBufferDesc* quarter_picture_ptr; |
125 | 474 | EbPictureBufferDesc* input_padded_pic; |
126 | 474 | EbPictureBufferDesc* input_pic; |
127 | 474 | EbPaReferenceObject* pa_ref_obj_; |
128 | | |
129 | | //assert((int)pcs->pa_ref_pic_wrapper->live_count > 0); |
130 | 474 | pa_ref_obj_ = (EbPaReferenceObject*)pcs->pa_ref_pic_wrapper->object_ptr; |
131 | | // Set 1/4 and 1/16 ME input buffer(s); filtered or decimated |
132 | 474 | quarter_picture_ptr = pa_ref_obj_->quarter_downsampled_picture_ptr; |
133 | 474 | sixteenth_picture_ptr = pa_ref_obj_->sixteenth_downsampled_picture_ptr; |
134 | 474 | input_padded_pic = pa_ref_obj_->input_padded_pic; |
135 | | |
136 | 474 | input_pic = pcs->enhanced_pic; |
137 | | |
138 | | // Segments |
139 | 474 | uint32_t segment_index = in_results_ptr->segment_index; |
140 | 474 | uint32_t pic_width_in_b64 = (pcs->aligned_width + scs->b64_size - 1) / scs->b64_size; |
141 | 474 | uint32_t picture_height_in_b64 = (pcs->aligned_height + scs->b64_size - 1) / scs->b64_size; |
142 | 474 | uint32_t y_segment_index; |
143 | 474 | uint32_t x_segment_index; |
144 | | |
145 | 474 | SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_segment_index, y_segment_index, pcs->me_segments_column_count); |
146 | 474 | uint32_t x_b64_start_index = SEGMENT_START_IDX( |
147 | 474 | x_segment_index, pic_width_in_b64, pcs->me_segments_column_count); |
148 | 474 | uint32_t x_b64_end_index = SEGMENT_END_IDX(x_segment_index, pic_width_in_b64, pcs->me_segments_column_count); |
149 | 474 | uint32_t y_b64_start_index = SEGMENT_START_IDX( |
150 | 474 | y_segment_index, picture_height_in_b64, pcs->me_segments_row_count); |
151 | 474 | uint32_t y_b64_end_index = SEGMENT_END_IDX(y_segment_index, picture_height_in_b64, pcs->me_segments_row_count); |
152 | | |
153 | 474 | bool skip_me = false; |
154 | 474 | if (svt_aom_is_pic_skipped(pcs)) { |
155 | 0 | skip_me = true; |
156 | 0 | } |
157 | | // skip me for the first pass. ME is already performed |
158 | 474 | if (!skip_me) { |
159 | 474 | if (pcs->slice_type != I_SLICE) { |
160 | | // Use scaled source references if resolution of the reference is different that of the input |
161 | 0 | svt_aom_use_scaled_source_refs_if_needed( |
162 | 0 | pcs, input_pic, pa_ref_obj_, &input_padded_pic, &quarter_picture_ptr, &sixteenth_picture_ptr); |
163 | | |
164 | | // 64x64 Block Loop |
165 | 0 | for (uint32_t y_b64_index = y_b64_start_index; y_b64_index < y_b64_end_index; ++y_b64_index) { |
166 | 0 | for (uint32_t x_b64_index = x_b64_start_index; x_b64_index < x_b64_end_index; ++x_b64_index) { |
167 | 0 | uint32_t b64_index = (uint16_t)(x_b64_index + y_b64_index * pic_width_in_b64); |
168 | |
|
169 | 0 | uint32_t b64_origin_x = x_b64_index * scs->b64_size; |
170 | 0 | uint32_t b64_origin_y = y_b64_index * scs->b64_size; |
171 | | |
172 | | // Load the 64x64 Block from the input to the intermediate block buffer |
173 | 0 | uint32_t buffer_index = (b64_origin_y)*input_pic->y_stride + b64_origin_x; |
174 | | #ifdef ARCH_X86_64 |
175 | | uint8_t* src_ptr = &input_padded_pic->y_buffer[buffer_index]; |
176 | | uint32_t b64_height = (pcs->aligned_height - b64_origin_y) < BLOCK_SIZE_64 |
177 | | ? pcs->aligned_height - b64_origin_y |
178 | | : BLOCK_SIZE_64; |
179 | | //_MM_HINT_T0 //_MM_HINT_T1 //_MM_HINT_T2//_MM_HINT_NTA |
180 | | for (uint32_t i = 0; i < b64_height; i++) { |
181 | | char const* p = (char const*)(src_ptr + i * input_padded_pic->y_stride); |
182 | | _mm_prefetch(p, _MM_HINT_T2); |
183 | | } |
184 | | #endif |
185 | 0 | me_context_ptr->me_ctx->b64_src_ptr = &input_padded_pic->y_buffer[buffer_index]; |
186 | 0 | me_context_ptr->me_ctx->b64_src_stride = input_padded_pic->y_stride; |
187 | | |
188 | | // Load the 1/4 decimated SB from the 1/4 decimated input to the 1/4 intermediate SB buffer |
189 | 0 | if (me_context_ptr->me_ctx->enable_hme_level1_flag) { |
190 | 0 | buffer_index = ((b64_origin_y >> 1)) * quarter_picture_ptr->y_stride + (b64_origin_x >> 1); |
191 | |
|
192 | 0 | me_context_ptr->me_ctx->quarter_b64_buffer = &quarter_picture_ptr->y_buffer[buffer_index]; |
193 | 0 | me_context_ptr->me_ctx->quarter_b64_buffer_stride = quarter_picture_ptr->y_stride; |
194 | 0 | } |
195 | | |
196 | | // Load the 1/16 decimated SB from the 1/16 decimated input to the 1/16 intermediate SB buffer |
197 | 0 | if (me_context_ptr->me_ctx->enable_hme_level0_flag) { |
198 | 0 | buffer_index = ((b64_origin_y >> 2)) * sixteenth_picture_ptr->y_stride + |
199 | 0 | (b64_origin_x >> 2); |
200 | |
|
201 | 0 | me_context_ptr->me_ctx->sixteenth_b64_buffer = |
202 | 0 | &sixteenth_picture_ptr->y_buffer[buffer_index]; |
203 | 0 | me_context_ptr->me_ctx->sixteenth_b64_buffer_stride = sixteenth_picture_ptr->y_stride; |
204 | 0 | } |
205 | |
|
206 | 0 | me_context_ptr->me_ctx->me_type = ME_OPEN_LOOP; |
207 | |
|
208 | 0 | if ((in_results_ptr->task_type == TASK_PAME) || |
209 | 0 | (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) { |
210 | 0 | me_context_ptr->me_ctx->num_of_list_to_search = MAX_NUM_OF_REF_PIC_LIST; |
211 | |
|
212 | 0 | me_context_ptr->me_ctx->num_of_ref_pic_to_search[0] = pcs->ref_list0_count_try; |
213 | 0 | me_context_ptr->me_ctx->num_of_ref_pic_to_search[1] = pcs->ref_list1_count_try; |
214 | 0 | me_context_ptr->me_ctx->temporal_layer_index = pcs->temporal_layer_index; |
215 | 0 | me_context_ptr->me_ctx->is_ref = pcs->is_ref; |
216 | |
|
217 | 0 | if (pcs->frame_superres_enabled || pcs->frame_resize_enabled) { |
218 | 0 | for (int i = 0; i < me_context_ptr->me_ctx->num_of_list_to_search; i++) { |
219 | 0 | for (int j = 0; j < me_context_ptr->me_ctx->num_of_ref_pic_to_search[i]; j++) { |
220 | | //assert((int)pcs->ref_pa_pic_ptr_array[i][j]->live_count > 0); |
221 | 0 | uint8_t sr_denom_idx = svt_aom_get_denom_idx(pcs->superres_denom); |
222 | 0 | uint8_t resize_denom_idx = svt_aom_get_denom_idx(pcs->resize_denom); |
223 | 0 | EbPaReferenceObject* ref_object = |
224 | 0 | (EbPaReferenceObject*)pcs->ref_pa_pic_ptr_array[i][j]->object_ptr; |
225 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_ptr = |
226 | 0 | ref_object |
227 | 0 | ->downscaled_input_padded_picture_ptr[sr_denom_idx][resize_denom_idx]; |
228 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].quarter_picture_ptr = |
229 | 0 | ref_object->downscaled_quarter_downsampled_picture_ptr[sr_denom_idx] |
230 | 0 | [resize_denom_idx]; |
231 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].sixteenth_picture_ptr = |
232 | 0 | ref_object->downscaled_sixteenth_downsampled_picture_ptr[sr_denom_idx] |
233 | 0 | [resize_denom_idx]; |
234 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_number = |
235 | 0 | ref_object->picture_number; |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } else { |
239 | 0 | for (int i = 0; i < me_context_ptr->me_ctx->num_of_list_to_search; i++) { |
240 | 0 | for (int j = 0; j < me_context_ptr->me_ctx->num_of_ref_pic_to_search[i]; j++) { |
241 | | //assert((int)pcs->ref_pa_pic_ptr_array[i][j]->live_count > 0); |
242 | 0 | EbPaReferenceObject* ref_object = |
243 | 0 | (EbPaReferenceObject*)pcs->ref_pa_pic_ptr_array[i][j]->object_ptr; |
244 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_ptr = |
245 | 0 | ref_object->input_padded_pic; |
246 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].quarter_picture_ptr = |
247 | 0 | ref_object->quarter_downsampled_picture_ptr; |
248 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].sixteenth_picture_ptr = |
249 | 0 | ref_object->sixteenth_downsampled_picture_ptr; |
250 | 0 | me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_number = |
251 | 0 | ref_object->picture_number; |
252 | 0 | } |
253 | 0 | } |
254 | 0 | } |
255 | 0 | } |
256 | |
|
257 | 0 | svt_aom_motion_estimation_b64( |
258 | 0 | pcs, b64_index, b64_origin_x, b64_origin_y, me_context_ptr->me_ctx, input_pic); |
259 | |
|
260 | 0 | if ((in_results_ptr->task_type == TASK_PAME) || |
261 | 0 | (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) { |
262 | 0 | svt_block_on_mutex(pcs->me_processed_b64_mutex); |
263 | 0 | pcs->me_processed_b64_count++; |
264 | | // We need to finish ME for all SBs to do GM |
265 | 0 | if (pcs->me_processed_b64_count == pcs->b64_total_count) { |
266 | 0 | if (pcs->gm_ctrls.enabled && (!pcs->gm_ctrls.pp_enabled || pcs->gm_pp_detected)) { |
267 | 0 | svt_aom_global_motion_estimation(pcs, input_pic); |
268 | 0 | } else { |
269 | | // Initilize global motion to be OFF when GM is OFF |
270 | 0 | memset(pcs->is_global_motion, false, MAX_NUM_OF_REF_PIC_LIST * REF_LIST_MAX_DEPTH); |
271 | 0 | } |
272 | 0 | } |
273 | |
|
274 | 0 | svt_release_mutex(pcs->me_processed_b64_mutex); |
275 | 0 | } |
276 | 0 | } |
277 | 0 | } |
278 | 0 | } |
279 | 474 | } |
280 | | // Get Empty Results Object |
281 | 474 | svt_get_empty_object(me_context_ptr->motion_estimation_results_output_fifo_ptr, &out_results_wrapper); |
282 | | |
283 | 474 | MotionEstimationResults* out_results = (MotionEstimationResults*)out_results_wrapper->object_ptr; |
284 | 474 | out_results->pcs_wrapper = in_results_ptr->pcs_wrapper; |
285 | 474 | out_results->segment_index = segment_index; |
286 | 474 | out_results->task_type = in_results_ptr->task_type; |
287 | | // Release the Input Results |
288 | 474 | svt_release_object(in_results_wrapper_ptr); |
289 | | |
290 | | // Post the Full Results Object |
291 | 474 | svt_post_full_object(out_results_wrapper); |
292 | 474 | } else if (in_results_ptr->task_type == TASK_TFME) { |
293 | | //gm pre-processing for only base B |
294 | 0 | if (pcs->gm_ctrls.pp_enabled && pcs->gm_pp_enabled && in_results_ptr->segment_index == 0) { |
295 | 0 | svt_aom_gm_pre_processor(pcs, pcs->temp_filt_pcs_list); |
296 | 0 | } |
297 | | // temporal filtering start |
298 | 0 | me_context_ptr->me_ctx->me_type = ME_MCTF; |
299 | 0 | svt_av1_init_temporal_filtering(pcs->temp_filt_pcs_list, pcs, me_context_ptr, in_results_ptr->segment_index); |
300 | | |
301 | | // Release the Input Results |
302 | 0 | svt_release_object(in_results_wrapper_ptr); |
303 | 2 | } else if (in_results_ptr->task_type == TASK_DG_DETECTOR_HME) { |
304 | | // dynamic gop detector |
305 | 0 | dg_detector_hme_level0(pcs, in_results_ptr->segment_index); |
306 | | |
307 | | // Release the Input Results |
308 | 0 | svt_release_object(in_results_wrapper_ptr); |
309 | 0 | } |
310 | 476 | return EB_ErrorNone; |
311 | 3.31k | } |
312 | | |
313 | 2.84k | void* svt_aom_motion_estimation_kernel(void* input_ptr) { |
314 | 2.84k | EbThreadContext* thread_ctx = (EbThreadContext*)input_ptr; |
315 | 3.31k | for (;;) { |
316 | 3.31k | EbErrorType err = svt_aom_motion_estimation_kernel_iter(thread_ctx->priv); |
317 | 3.31k | if (err == EB_NoErrorFifoShutdown) { |
318 | 2.84k | return NULL; |
319 | 2.84k | } |
320 | 3.31k | } |
321 | 2 | return NULL; |
322 | 2.84k | } |