Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/me_process.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
*
4
* This source code is subject to the terms of the BSD 2 Clause License and
5
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
* was not distributed with this source code in the LICENSE file, you can
7
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
* Media Patent License 1.0 was not distributed with this source code in the
9
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
*/
11
12
#include <stdlib.h>
13
14
#include "enc_handle.h"
15
#include "utility.h"
16
#include "pcs.h"
17
#include "pd_results.h"
18
#include "me_process.h"
19
#include "me_results.h"
20
#include "reference_object.h"
21
#include "motion_estimation.h"
22
#include "lambda_rate_tables.h"
23
#include "compute_sad.h"
24
#ifdef ARCH_X86_64
25
#include <emmintrin.h>
26
#endif
27
#include "temporal_filtering.h"
28
#include "global_me.h"
29
30
#include "resize.h"
31
#include "pic_demux_results.h"
32
#include "rc_tasks.h"
33
#include "firstpass.h"
34
#include "initial_rc_process.h"
35
#include "enc_mode_config.h"
36
37
/* --32x32-
38
|00||01|
39
|02||03|
40
--------*/
41
/* ------16x16-----
42
|00||01||04||05|
43
|02||03||06||07|
44
|08||09||12||13|
45
|10||11||14||15|
46
----------------*/
47
/* ------8x8----------------------------
48
|00||01||04||05|     |16||17||20||21|
49
|02||03||06||07|     |18||19||22||23|
50
|08||09||12||13|     |24||25||28||29|
51
|10||11||14||15|     |26||27||30||31|
52
53
|32||33||36||37|     |48||49||52||53|
54
|34||35||38||39|     |50||51||54||55|
55
|40||41||44||45|     |56||57||60||61|
56
|42||43||46||47|     |58||59||62||63|
57
-------------------------------------*/
58
59
void dg_detector_hme_level0(PictureParentControlSet* ppcs, uint32_t seg_idx);
60
61
2.84k
static void motion_estimation_context_dctor(EbPtr p) {
62
2.84k
    EbThreadContext*           thread_ctx = (EbThreadContext*)p;
63
2.84k
    MotionEstimationContext_t* obj        = (MotionEstimationContext_t*)thread_ctx->priv;
64
2.84k
    EB_DELETE(obj->me_ctx);
65
2.84k
    EB_FREE_ARRAY(obj);
66
2.84k
}
67
68
/************************************************
69
 * Motion Analysis Context Constructor
70
 ************************************************/
71
EbErrorType svt_aom_motion_estimation_context_ctor(EbThreadContext* thread_ctx, const EbEncHandle* enc_handle_ptr,
72
2.84k
                                                   int index) {
73
2.84k
    MotionEstimationContext_t* me_context_ptr;
74
75
2.84k
    EB_CALLOC_ARRAY(me_context_ptr, 1);
76
2.84k
    thread_ctx->priv                                        = me_context_ptr;
77
2.84k
    thread_ctx->dctor                                       = motion_estimation_context_dctor;
78
2.84k
    me_context_ptr->picture_decision_results_input_fifo_ptr = svt_system_resource_get_consumer_fifo(
79
2.84k
        enc_handle_ptr->picture_decision_results_resource_ptr, index);
80
2.84k
    me_context_ptr->motion_estimation_results_output_fifo_ptr = svt_system_resource_get_producer_fifo(
81
2.84k
        enc_handle_ptr->motion_estimation_results_resource_ptr, index);
82
2.84k
    EB_NEW(me_context_ptr->me_ctx, svt_aom_me_context_ctor);
83
2.84k
    return EB_ErrorNone;
84
2.84k
}
85
86
/************************************************
87
 * Motion Analysis Kernel
88
 * The Motion Analysis performs  Motion Estimation
89
 * This process has access to the current input picture as well as
90
 * the input pictures, which the current picture references according
91
 * to the prediction structure pattern.  The Motion Analysis process is multithreaded,
92
 * so pictures can be processed out of order as long as all inputs are available.
93
 ************************************************/
94
3.31k
EbErrorType svt_aom_motion_estimation_kernel_iter(void* context) {
95
3.31k
    MotionEstimationContext_t* me_context_ptr = (MotionEstimationContext_t*)context;
96
97
3.31k
    EbObjectWrapper* in_results_wrapper_ptr;
98
3.31k
    EbObjectWrapper* out_results_wrapper;
99
100
    // Get Input Full Object
101
3.31k
    EB_GET_FULL_OBJECT(me_context_ptr->picture_decision_results_input_fifo_ptr, &in_results_wrapper_ptr);
102
476
    PictureDecisionResults*  in_results_ptr = (PictureDecisionResults*)in_results_wrapper_ptr->object_ptr;
103
476
    PictureParentControlSet* pcs            = (PictureParentControlSet*)in_results_ptr->pcs_wrapper->object_ptr;
104
476
    SequenceControlSet*      scs            = pcs->scs;
105
476
    if (in_results_ptr->task_type == TASK_TFME) {
106
0
        me_context_ptr->me_ctx->me_type = ME_MCTF;
107
476
    } else if (in_results_ptr->task_type == TASK_PAME || in_results_ptr->task_type == TASK_SUPERRES_RE_ME) {
108
474
        me_context_ptr->me_ctx->me_type = ME_OPEN_LOOP;
109
474
    } else if (in_results_ptr->task_type == TASK_DG_DETECTOR_HME) {
110
0
        me_context_ptr->me_ctx->me_type = ME_DG_DETECTOR;
111
0
    }
112
113
    // ME Kernel Signal(s) derivation
114
476
    if ((in_results_ptr->task_type == TASK_PAME) || (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {
115
474
        svt_aom_sig_deriv_me(scs, pcs, me_context_ptr->me_ctx);
116
474
    }
117
118
2
    else if (in_results_ptr->task_type == TASK_TFME) {
119
0
        svt_aom_sig_deriv_me_tf(pcs, me_context_ptr->me_ctx);
120
0
    }
121
122
476
    if ((in_results_ptr->task_type == TASK_PAME) || (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {
123
474
        EbPictureBufferDesc* sixteenth_picture_ptr;
124
474
        EbPictureBufferDesc* quarter_picture_ptr;
125
474
        EbPictureBufferDesc* input_padded_pic;
126
474
        EbPictureBufferDesc* input_pic;
127
474
        EbPaReferenceObject* pa_ref_obj_;
128
129
        //assert((int)pcs->pa_ref_pic_wrapper->live_count > 0);
130
474
        pa_ref_obj_ = (EbPaReferenceObject*)pcs->pa_ref_pic_wrapper->object_ptr;
131
        // Set 1/4 and 1/16 ME input buffer(s); filtered or decimated
132
474
        quarter_picture_ptr   = pa_ref_obj_->quarter_downsampled_picture_ptr;
133
474
        sixteenth_picture_ptr = pa_ref_obj_->sixteenth_downsampled_picture_ptr;
134
474
        input_padded_pic      = pa_ref_obj_->input_padded_pic;
135
136
474
        input_pic = pcs->enhanced_pic;
137
138
        // Segments
139
474
        uint32_t segment_index         = in_results_ptr->segment_index;
140
474
        uint32_t pic_width_in_b64      = (pcs->aligned_width + scs->b64_size - 1) / scs->b64_size;
141
474
        uint32_t picture_height_in_b64 = (pcs->aligned_height + scs->b64_size - 1) / scs->b64_size;
142
474
        uint32_t y_segment_index;
143
474
        uint32_t x_segment_index;
144
145
474
        SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_segment_index, y_segment_index, pcs->me_segments_column_count);
146
474
        uint32_t x_b64_start_index = SEGMENT_START_IDX(
147
474
            x_segment_index, pic_width_in_b64, pcs->me_segments_column_count);
148
474
        uint32_t x_b64_end_index   = SEGMENT_END_IDX(x_segment_index, pic_width_in_b64, pcs->me_segments_column_count);
149
474
        uint32_t y_b64_start_index = SEGMENT_START_IDX(
150
474
            y_segment_index, picture_height_in_b64, pcs->me_segments_row_count);
151
474
        uint32_t y_b64_end_index = SEGMENT_END_IDX(y_segment_index, picture_height_in_b64, pcs->me_segments_row_count);
152
153
474
        bool skip_me = false;
154
474
        if (svt_aom_is_pic_skipped(pcs)) {
155
0
            skip_me = true;
156
0
        }
157
        // skip me for the first pass. ME is already performed
158
474
        if (!skip_me) {
159
474
            if (pcs->slice_type != I_SLICE) {
160
                // Use scaled source references if resolution of the reference is different that of the input
161
0
                svt_aom_use_scaled_source_refs_if_needed(
162
0
                    pcs, input_pic, pa_ref_obj_, &input_padded_pic, &quarter_picture_ptr, &sixteenth_picture_ptr);
163
164
                // 64x64 Block Loop
165
0
                for (uint32_t y_b64_index = y_b64_start_index; y_b64_index < y_b64_end_index; ++y_b64_index) {
166
0
                    for (uint32_t x_b64_index = x_b64_start_index; x_b64_index < x_b64_end_index; ++x_b64_index) {
167
0
                        uint32_t b64_index = (uint16_t)(x_b64_index + y_b64_index * pic_width_in_b64);
168
169
0
                        uint32_t b64_origin_x = x_b64_index * scs->b64_size;
170
0
                        uint32_t b64_origin_y = y_b64_index * scs->b64_size;
171
172
                        // Load the 64x64 Block from the input to the intermediate block buffer
173
0
                        uint32_t buffer_index = (b64_origin_y)*input_pic->y_stride + b64_origin_x;
174
#ifdef ARCH_X86_64
175
                        uint8_t* src_ptr    = &input_padded_pic->y_buffer[buffer_index];
176
                        uint32_t b64_height = (pcs->aligned_height - b64_origin_y) < BLOCK_SIZE_64
177
                            ? pcs->aligned_height - b64_origin_y
178
                            : BLOCK_SIZE_64;
179
                        //_MM_HINT_T0     //_MM_HINT_T1    //_MM_HINT_T2//_MM_HINT_NTA
180
                        for (uint32_t i = 0; i < b64_height; i++) {
181
                            char const* p = (char const*)(src_ptr + i * input_padded_pic->y_stride);
182
                            _mm_prefetch(p, _MM_HINT_T2);
183
                        }
184
#endif
185
0
                        me_context_ptr->me_ctx->b64_src_ptr    = &input_padded_pic->y_buffer[buffer_index];
186
0
                        me_context_ptr->me_ctx->b64_src_stride = input_padded_pic->y_stride;
187
188
                        // Load the 1/4 decimated SB from the 1/4 decimated input to the 1/4 intermediate SB buffer
189
0
                        if (me_context_ptr->me_ctx->enable_hme_level1_flag) {
190
0
                            buffer_index = ((b64_origin_y >> 1)) * quarter_picture_ptr->y_stride + (b64_origin_x >> 1);
191
192
0
                            me_context_ptr->me_ctx->quarter_b64_buffer = &quarter_picture_ptr->y_buffer[buffer_index];
193
0
                            me_context_ptr->me_ctx->quarter_b64_buffer_stride = quarter_picture_ptr->y_stride;
194
0
                        }
195
196
                        // Load the 1/16 decimated SB from the 1/16 decimated input to the 1/16 intermediate SB buffer
197
0
                        if (me_context_ptr->me_ctx->enable_hme_level0_flag) {
198
0
                            buffer_index = ((b64_origin_y >> 2)) * sixteenth_picture_ptr->y_stride +
199
0
                                (b64_origin_x >> 2);
200
201
0
                            me_context_ptr->me_ctx->sixteenth_b64_buffer =
202
0
                                &sixteenth_picture_ptr->y_buffer[buffer_index];
203
0
                            me_context_ptr->me_ctx->sixteenth_b64_buffer_stride = sixteenth_picture_ptr->y_stride;
204
0
                        }
205
206
0
                        me_context_ptr->me_ctx->me_type = ME_OPEN_LOOP;
207
208
0
                        if ((in_results_ptr->task_type == TASK_PAME) ||
209
0
                            (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {
210
0
                            me_context_ptr->me_ctx->num_of_list_to_search = MAX_NUM_OF_REF_PIC_LIST;
211
212
0
                            me_context_ptr->me_ctx->num_of_ref_pic_to_search[0] = pcs->ref_list0_count_try;
213
0
                            me_context_ptr->me_ctx->num_of_ref_pic_to_search[1] = pcs->ref_list1_count_try;
214
0
                            me_context_ptr->me_ctx->temporal_layer_index        = pcs->temporal_layer_index;
215
0
                            me_context_ptr->me_ctx->is_ref                      = pcs->is_ref;
216
217
0
                            if (pcs->frame_superres_enabled || pcs->frame_resize_enabled) {
218
0
                                for (int i = 0; i < me_context_ptr->me_ctx->num_of_list_to_search; i++) {
219
0
                                    for (int j = 0; j < me_context_ptr->me_ctx->num_of_ref_pic_to_search[i]; j++) {
220
                                        //assert((int)pcs->ref_pa_pic_ptr_array[i][j]->live_count > 0);
221
0
                                        uint8_t sr_denom_idx     = svt_aom_get_denom_idx(pcs->superres_denom);
222
0
                                        uint8_t resize_denom_idx = svt_aom_get_denom_idx(pcs->resize_denom);
223
0
                                        EbPaReferenceObject* ref_object =
224
0
                                            (EbPaReferenceObject*)pcs->ref_pa_pic_ptr_array[i][j]->object_ptr;
225
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_ptr =
226
0
                                            ref_object
227
0
                                                ->downscaled_input_padded_picture_ptr[sr_denom_idx][resize_denom_idx];
228
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].quarter_picture_ptr =
229
0
                                            ref_object->downscaled_quarter_downsampled_picture_ptr[sr_denom_idx]
230
0
                                                                                                  [resize_denom_idx];
231
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].sixteenth_picture_ptr =
232
0
                                            ref_object->downscaled_sixteenth_downsampled_picture_ptr[sr_denom_idx]
233
0
                                                                                                    [resize_denom_idx];
234
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_number =
235
0
                                            ref_object->picture_number;
236
0
                                    }
237
0
                                }
238
0
                            } else {
239
0
                                for (int i = 0; i < me_context_ptr->me_ctx->num_of_list_to_search; i++) {
240
0
                                    for (int j = 0; j < me_context_ptr->me_ctx->num_of_ref_pic_to_search[i]; j++) {
241
                                        //assert((int)pcs->ref_pa_pic_ptr_array[i][j]->live_count > 0);
242
0
                                        EbPaReferenceObject* ref_object =
243
0
                                            (EbPaReferenceObject*)pcs->ref_pa_pic_ptr_array[i][j]->object_ptr;
244
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_ptr =
245
0
                                            ref_object->input_padded_pic;
246
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].quarter_picture_ptr =
247
0
                                            ref_object->quarter_downsampled_picture_ptr;
248
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].sixteenth_picture_ptr =
249
0
                                            ref_object->sixteenth_downsampled_picture_ptr;
250
0
                                        me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_number =
251
0
                                            ref_object->picture_number;
252
0
                                    }
253
0
                                }
254
0
                            }
255
0
                        }
256
257
0
                        svt_aom_motion_estimation_b64(
258
0
                            pcs, b64_index, b64_origin_x, b64_origin_y, me_context_ptr->me_ctx, input_pic);
259
260
0
                        if ((in_results_ptr->task_type == TASK_PAME) ||
261
0
                            (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {
262
0
                            svt_block_on_mutex(pcs->me_processed_b64_mutex);
263
0
                            pcs->me_processed_b64_count++;
264
                            // We need to finish ME for all SBs to do GM
265
0
                            if (pcs->me_processed_b64_count == pcs->b64_total_count) {
266
0
                                if (pcs->gm_ctrls.enabled && (!pcs->gm_ctrls.pp_enabled || pcs->gm_pp_detected)) {
267
0
                                    svt_aom_global_motion_estimation(pcs, input_pic);
268
0
                                } else {
269
                                    // Initilize global motion to be OFF when GM is OFF
270
0
                                    memset(pcs->is_global_motion, false, MAX_NUM_OF_REF_PIC_LIST * REF_LIST_MAX_DEPTH);
271
0
                                }
272
0
                            }
273
274
0
                            svt_release_mutex(pcs->me_processed_b64_mutex);
275
0
                        }
276
0
                    }
277
0
                }
278
0
            }
279
474
        }
280
        // Get Empty Results Object
281
474
        svt_get_empty_object(me_context_ptr->motion_estimation_results_output_fifo_ptr, &out_results_wrapper);
282
283
474
        MotionEstimationResults* out_results = (MotionEstimationResults*)out_results_wrapper->object_ptr;
284
474
        out_results->pcs_wrapper             = in_results_ptr->pcs_wrapper;
285
474
        out_results->segment_index           = segment_index;
286
474
        out_results->task_type               = in_results_ptr->task_type;
287
        // Release the Input Results
288
474
        svt_release_object(in_results_wrapper_ptr);
289
290
        // Post the Full Results Object
291
474
        svt_post_full_object(out_results_wrapper);
292
474
    } else if (in_results_ptr->task_type == TASK_TFME) {
293
        //gm pre-processing for only base B
294
0
        if (pcs->gm_ctrls.pp_enabled && pcs->gm_pp_enabled && in_results_ptr->segment_index == 0) {
295
0
            svt_aom_gm_pre_processor(pcs, pcs->temp_filt_pcs_list);
296
0
        }
297
        // temporal filtering start
298
0
        me_context_ptr->me_ctx->me_type = ME_MCTF;
299
0
        svt_av1_init_temporal_filtering(pcs->temp_filt_pcs_list, pcs, me_context_ptr, in_results_ptr->segment_index);
300
301
        // Release the Input Results
302
0
        svt_release_object(in_results_wrapper_ptr);
303
2
    } else if (in_results_ptr->task_type == TASK_DG_DETECTOR_HME) {
304
        // dynamic gop detector
305
0
        dg_detector_hme_level0(pcs, in_results_ptr->segment_index);
306
307
        // Release the Input Results
308
0
        svt_release_object(in_results_wrapper_ptr);
309
0
    }
310
476
    return EB_ErrorNone;
311
3.31k
}
312
313
2.84k
void* svt_aom_motion_estimation_kernel(void* input_ptr) {
314
2.84k
    EbThreadContext* thread_ctx = (EbThreadContext*)input_ptr;
315
3.31k
    for (;;) {
316
3.31k
        EbErrorType err = svt_aom_motion_estimation_kernel_iter(thread_ctx->priv);
317
3.31k
        if (err == EB_NoErrorFifoShutdown) {
318
2.84k
            return NULL;
319
2.84k
        }
320
3.31k
    }
321
2
    return NULL;
322
2.84k
}