Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/motion_estimation.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright(c) 2019 Netflix, Inc.
4
*
5
* This source code is subject to the terms of the BSD 2 Clause License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
#include <stdio.h>
14
#include <inttypes.h>
15
16
#include "aom_dsp_rtcd.h"
17
#include "pcs.h"
18
#include "sequence_control_set.h"
19
#include "motion_estimation.h"
20
#include "utility.h"
21
22
#include "compute_sad.h"
23
#include "reference_object.h"
24
25
#include "enc_intra_prediction.h"
26
#include "lambda_rate_tables.h"
27
#include "transforms.h"
28
29
#include "svt_log.h"
30
#include "resize.h"
31
32
/********************************************
33
 * Constants
34
 ********************************************/
35
0
#define REFERENCE_PIC_LIST_0 0
36
0
#define REFERENCE_PIC_LIST_1 1
37
38
/*******************************************
39
 * Compute8x4SAD_Default
40
 *   Unoptimized 8x4 SAD
41
 *******************************************/
42
uint32_t svt_aom_compute8x4_sad_kernel_c(uint8_t* src, // input parameter, source samples Ptr
43
                                         uint32_t src_stride, // input parameter, source stride
44
                                         uint8_t* ref, // input parameter, reference samples Ptr
45
                                         uint32_t ref_stride) // input parameter, reference stride
46
0
{
47
0
    uint32_t row_number_in_blocks_8x4;
48
0
    uint32_t sad_block_8x4 = 0;
49
50
0
    for (row_number_in_blocks_8x4 = 0; row_number_in_blocks_8x4 < 4; ++row_number_in_blocks_8x4) {
51
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x00], ref[0x00]);
52
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x01], ref[0x01]);
53
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x02], ref[0x02]);
54
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x03], ref[0x03]);
55
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x04], ref[0x04]);
56
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x05], ref[0x05]);
57
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x06], ref[0x06]);
58
0
        sad_block_8x4 += EB_ABS_DIFF(src[0x07], ref[0x07]);
59
0
        src += src_stride;
60
0
        ref += ref_stride;
61
0
    }
62
63
0
    return sad_block_8x4;
64
0
}
65
66
/*******************************************
67
 * Compute8x8SAD_Default
68
 *   Unoptimized 8x8 SAD
69
 *******************************************/
70
static uint32_t compute8x8_sad_kernel_c(uint8_t* src, // input parameter, source samples Ptr
71
                                        uint32_t src_stride, // input parameter, source stride
72
                                        uint8_t* ref, // input parameter, reference samples Ptr
73
                                        uint32_t ref_stride) // input parameter, reference stride
74
0
{
75
0
    uint32_t row_number_in_blocks_8x8;
76
0
    uint32_t sad_block_8x8 = 0;
77
78
0
    for (row_number_in_blocks_8x8 = 0; row_number_in_blocks_8x8 < 8; ++row_number_in_blocks_8x8) {
79
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x00], ref[0x00]);
80
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x01], ref[0x01]);
81
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x02], ref[0x02]);
82
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x03], ref[0x03]);
83
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x04], ref[0x04]);
84
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x05], ref[0x05]);
85
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x06], ref[0x06]);
86
0
        sad_block_8x8 += EB_ABS_DIFF(src[0x07], ref[0x07]);
87
0
        src += src_stride;
88
0
        ref += ref_stride;
89
0
    }
90
91
0
    return sad_block_8x8;
92
0
}
93
94
/*******************************************
95
Calculate SAD for 16x16 and its 8x8 sublcoks
96
and check if there is improvment, if yes keep
97
the best SAD+MV
98
*******************************************/
99
void svt_ext_sad_calculation_8x8_16x16_c(uint8_t* src, uint32_t src_stride, uint8_t* ref, uint32_t ref_stride,
100
                                         uint32_t* p_best_sad_8x8, uint32_t* p_best_sad_16x16, uint32_t* p_best_mv8x8,
101
                                         uint32_t* p_best_mv16x16, uint32_t mv, uint32_t* p_sad16x16,
102
0
                                         uint32_t* p_sad8x8, bool sub_sad) {
103
0
    uint32_t sad16x16;
104
105
0
    if (sub_sad) {
106
0
        p_sad8x8[0] = (svt_aom_compute8x4_sad_kernel_c(
107
0
                          src + 0 * src_stride + 0, 2 * src_stride, ref + 0 * ref_stride + 0, 2 * ref_stride))
108
0
            << 1;
109
0
        p_sad8x8[1] = (svt_aom_compute8x4_sad_kernel_c(
110
0
                          src + 0 * src_stride + 8, 2 * src_stride, ref + 0 * ref_stride + 8, 2 * ref_stride))
111
0
            << 1;
112
0
        p_sad8x8[2] = (svt_aom_compute8x4_sad_kernel_c(
113
0
                          src + 8 * src_stride + 0, 2 * src_stride, ref + 8 * ref_stride + 0, 2 * ref_stride))
114
0
            << 1;
115
0
        p_sad8x8[3] = (svt_aom_compute8x4_sad_kernel_c(
116
0
                          src + 8 * src_stride + 8, 2 * src_stride, ref + 8 * ref_stride + 8, 2 * ref_stride))
117
0
            << 1;
118
0
    } else {
119
0
        p_sad8x8[0] = compute8x8_sad_kernel_c(
120
0
            src + 0 * src_stride + 0, src_stride, ref + 0 * ref_stride + 0, ref_stride);
121
0
        p_sad8x8[1] = compute8x8_sad_kernel_c(
122
0
            src + 0 * src_stride + 8, src_stride, ref + 0 * ref_stride + 8, ref_stride);
123
0
        p_sad8x8[2] = compute8x8_sad_kernel_c(
124
0
            src + 8 * src_stride + 0, src_stride, ref + 8 * ref_stride + 0, ref_stride);
125
0
        p_sad8x8[3] = compute8x8_sad_kernel_c(
126
0
            src + 8 * src_stride + 8, src_stride, ref + 8 * ref_stride + 8, ref_stride);
127
0
    }
128
129
0
    if (p_sad8x8[0] < p_best_sad_8x8[0]) {
130
0
        p_best_sad_8x8[0] = (uint32_t)p_sad8x8[0];
131
0
        p_best_mv8x8[0]   = mv;
132
0
    }
133
134
0
    if (p_sad8x8[1] < p_best_sad_8x8[1]) {
135
0
        p_best_sad_8x8[1] = (uint32_t)p_sad8x8[1];
136
0
        p_best_mv8x8[1]   = mv;
137
0
    }
138
139
0
    if (p_sad8x8[2] < p_best_sad_8x8[2]) {
140
0
        p_best_sad_8x8[2] = (uint32_t)p_sad8x8[2];
141
0
        p_best_mv8x8[2]   = mv;
142
0
    }
143
144
0
    if (p_sad8x8[3] < p_best_sad_8x8[3]) {
145
0
        p_best_sad_8x8[3] = (uint32_t)p_sad8x8[3];
146
0
        p_best_mv8x8[3]   = mv;
147
0
    }
148
149
0
    sad16x16 = p_sad8x8[0] + p_sad8x8[1] + p_sad8x8[2] + p_sad8x8[3];
150
0
    if (sad16x16 < p_best_sad_16x16[0]) {
151
0
        p_best_sad_16x16[0] = (uint32_t)sad16x16;
152
0
        p_best_mv16x16[0]   = mv;
153
0
    }
154
155
0
    *p_sad16x16 = (uint32_t)sad16x16;
156
0
}
157
158
/*******************************************
159
Calculate SAD for 32x32,64x64 from 16x16
160
and check if there is improvment, if yes keep
161
the best SAD+MV
162
*******************************************/
163
void svt_ext_sad_calculation_32x32_64x64_c(uint32_t* p_sad16x16, uint32_t* p_best_sad_32x32, uint32_t* p_best_sad_64x64,
164
                                           uint32_t* p_best_mv32x32, uint32_t* p_best_mv64x64, uint32_t mv,
165
0
                                           uint32_t* p_sad32x32) {
166
0
    uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
167
168
0
    p_sad32x32[0] = sad32x32_0 = p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
169
0
    if (sad32x32_0 < p_best_sad_32x32[0]) {
170
0
        p_best_sad_32x32[0] = sad32x32_0;
171
0
        p_best_mv32x32[0]   = mv;
172
0
    }
173
174
0
    p_sad32x32[1] = sad32x32_1 = p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
175
0
    if (sad32x32_1 < p_best_sad_32x32[1]) {
176
0
        p_best_sad_32x32[1] = sad32x32_1;
177
0
        p_best_mv32x32[1]   = mv;
178
0
    }
179
180
0
    p_sad32x32[2] = sad32x32_2 = p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
181
0
    if (sad32x32_2 < p_best_sad_32x32[2]) {
182
0
        p_best_sad_32x32[2] = sad32x32_2;
183
0
        p_best_mv32x32[2]   = mv;
184
0
    }
185
186
0
    p_sad32x32[3] = sad32x32_3 = p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] + p_sad16x16[15];
187
0
    if (sad32x32_3 < p_best_sad_32x32[3]) {
188
0
        p_best_sad_32x32[3] = sad32x32_3;
189
0
        p_best_mv32x32[3]   = mv;
190
0
    }
191
0
    sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
192
0
    if (sad64x64 < p_best_sad_64x64[0]) {
193
0
        p_best_sad_64x64[0] = sad64x64;
194
0
        p_best_mv64x64[0]   = mv;
195
0
    }
196
0
}
197
198
/*******************************************
199
 * svt_ext_eight_sad_calculation_8x8_16x16
200
 *******************************************/
201
static void svt_ext_eight_sad_calculation_8x8_16x16(uint8_t* src, uint32_t src_stride, uint8_t* ref,
202
                                                    uint32_t ref_stride, uint32_t mv, uint32_t start_16x16_pos,
203
                                                    uint32_t* p_best_sad_8x8, uint32_t* p_best_sad_16x16,
204
                                                    uint32_t* p_best_mv8x8, uint32_t* p_best_mv16x16,
205
                                                    uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8],
206
0
                                                    bool sub_sad) {
207
0
    const uint32_t start_8x8_pos = 4 * start_16x16_pos;
208
0
    int16_t        x_mv, y_mv;
209
210
0
    (void)p_eight_sad8x8;
211
212
0
    p_best_sad_8x8 += start_8x8_pos;
213
0
    p_best_mv8x8 += start_8x8_pos;
214
0
    p_best_sad_16x16 += start_16x16_pos;
215
0
    p_best_mv16x16 += start_16x16_pos;
216
0
    if (sub_sad) {
217
0
        uint32_t src_stride_sub = (src_stride << 1);
218
0
        uint32_t ref_stride_sub = (ref_stride << 1);
219
0
        for (int search_index = 0; search_index < 8; search_index++) {
220
0
            uint32_t sad8x8_0 =
221
0
                (svt_aom_compute8x4_sad_kernel_c(src, src_stride_sub, ref + search_index, ref_stride_sub)) << 1;
222
0
            if (sad8x8_0 < p_best_sad_8x8[0]) {
223
0
                p_best_sad_8x8[0] = (uint32_t)sad8x8_0;
224
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
225
0
                y_mv              = _MVYT(mv);
226
0
                p_best_mv8x8[0]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
227
0
            }
228
229
0
            uint32_t sad8x8_1 =
230
0
                (svt_aom_compute8x4_sad_kernel_c(src + 8, src_stride_sub, ref + 8 + search_index, ref_stride_sub)) << 1;
231
0
            if (sad8x8_1 < p_best_sad_8x8[1]) {
232
0
                p_best_sad_8x8[1] = (uint32_t)sad8x8_1;
233
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
234
0
                y_mv              = _MVYT(mv);
235
0
                p_best_mv8x8[1]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
236
0
            }
237
238
0
            uint32_t sad8x8_2 = (svt_aom_compute8x4_sad_kernel_c(src + (src_stride << 3),
239
0
                                                                 src_stride_sub,
240
0
                                                                 ref + (ref_stride << 3) + search_index,
241
0
                                                                 ref_stride_sub))
242
0
                << 1;
243
0
            if (sad8x8_2 < p_best_sad_8x8[2]) {
244
0
                p_best_sad_8x8[2] = (uint32_t)sad8x8_2;
245
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
246
0
                y_mv              = _MVYT(mv);
247
0
                p_best_mv8x8[2]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
248
0
            }
249
250
0
            uint32_t sad8x8_3 = (svt_aom_compute8x4_sad_kernel_c(src + (src_stride << 3) + 8,
251
0
                                                                 src_stride_sub,
252
0
                                                                 ref + (ref_stride << 3) + 8 + search_index,
253
0
                                                                 ref_stride_sub))
254
0
                << 1;
255
0
            if (sad8x8_3 < p_best_sad_8x8[3]) {
256
0
                p_best_sad_8x8[3] = (uint32_t)sad8x8_3;
257
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
258
0
                y_mv              = _MVYT(mv);
259
0
                p_best_mv8x8[3]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
260
0
            }
261
0
            uint32_t sad16x16 = p_eight_sad16x16[start_16x16_pos][search_index] = sad8x8_0 + sad8x8_1 + sad8x8_2 +
262
0
                sad8x8_3;
263
0
            if (sad16x16 < p_best_sad_16x16[0]) {
264
0
                p_best_sad_16x16[0] = (uint32_t)sad16x16;
265
0
                x_mv                = _MVXT(mv) + (int16_t)search_index;
266
0
                y_mv                = _MVYT(mv);
267
0
                p_best_mv16x16[0]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
268
0
            }
269
0
        }
270
0
    } else {
271
0
        for (int search_index = 0; search_index < 8; search_index++) {
272
0
            uint32_t sad8x8_0 = compute8x8_sad_kernel_c(src, src_stride, ref + search_index, ref_stride);
273
0
            if (sad8x8_0 < p_best_sad_8x8[0]) {
274
0
                p_best_sad_8x8[0] = (uint32_t)sad8x8_0;
275
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
276
0
                y_mv              = _MVYT(mv);
277
0
                p_best_mv8x8[0]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
278
0
            }
279
280
0
            uint32_t sad8x8_1 = (compute8x8_sad_kernel_c(src + 8, src_stride, ref + 8 + search_index, ref_stride));
281
0
            if (sad8x8_1 < p_best_sad_8x8[1]) {
282
0
                p_best_sad_8x8[1] = (uint32_t)sad8x8_1;
283
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
284
0
                y_mv              = _MVYT(mv);
285
0
                p_best_mv8x8[1]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
286
0
            }
287
288
0
            uint32_t sad8x8_2 = (compute8x8_sad_kernel_c(
289
0
                src + (src_stride << 3), src_stride, ref + (ref_stride << 3) + search_index, ref_stride));
290
0
            if (sad8x8_2 < p_best_sad_8x8[2]) {
291
0
                p_best_sad_8x8[2] = (uint32_t)sad8x8_2;
292
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
293
0
                y_mv              = _MVYT(mv);
294
0
                p_best_mv8x8[2]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
295
0
            }
296
297
0
            uint32_t sad8x8_3 = (compute8x8_sad_kernel_c(
298
0
                src + (src_stride << 3) + 8, src_stride, ref + (ref_stride << 3) + 8 + search_index, ref_stride));
299
0
            if (sad8x8_3 < p_best_sad_8x8[3]) {
300
0
                p_best_sad_8x8[3] = (uint32_t)sad8x8_3;
301
0
                x_mv              = _MVXT(mv) + (int16_t)search_index;
302
0
                y_mv              = _MVYT(mv);
303
0
                p_best_mv8x8[3]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
304
0
            }
305
0
            uint32_t sad16x16 = p_eight_sad16x16[start_16x16_pos][search_index] = sad8x8_0 + sad8x8_1 + sad8x8_2 +
306
0
                sad8x8_3;
307
0
            if (sad16x16 < p_best_sad_16x16[0]) {
308
0
                p_best_sad_16x16[0] = (uint32_t)sad16x16;
309
0
                x_mv                = _MVXT(mv) + (int16_t)search_index;
310
0
                y_mv                = _MVYT(mv);
311
0
                p_best_mv16x16[0]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
312
0
            }
313
0
        }
314
0
    }
315
0
}
316
317
void svt_ext_all_sad_calculation_8x8_16x16_c(uint8_t* src, uint32_t src_stride, uint8_t* ref, uint32_t ref_stride,
318
                                             uint32_t mv, uint32_t* p_best_sad_8x8, uint32_t* p_best_sad_16x16,
319
                                             uint32_t* p_best_mv8x8, uint32_t* p_best_mv16x16,
320
                                             uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8],
321
0
                                             bool sub_sad) {
322
0
    static const char offsets[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
323
    //---- 16x16 : 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15
324
0
    for (int y = 0; y < 4; y++) {
325
0
        for (int x = 0; x < 4; x++) {
326
0
            const uint32_t block_index           = 16 * y * src_stride + 16 * x;
327
0
            const uint32_t search_position_index = 16 * y * ref_stride + 16 * x;
328
0
            svt_ext_eight_sad_calculation_8x8_16x16(src + block_index,
329
0
                                                    src_stride,
330
0
                                                    ref + search_position_index,
331
0
                                                    ref_stride,
332
0
                                                    mv,
333
0
                                                    offsets[4 * y + x],
334
0
                                                    p_best_sad_8x8,
335
0
                                                    p_best_sad_16x16,
336
0
                                                    p_best_mv8x8,
337
0
                                                    p_best_mv16x16,
338
0
                                                    p_eight_sad16x16,
339
0
                                                    p_eight_sad8x8,
340
0
                                                    sub_sad);
341
0
        }
342
0
    }
343
0
}
344
345
/*******************************************
346
Calculate SAD for 32x32,64x64 from 16x16
347
and check if there is improvment, if yes keep
348
the best SAD+MV
349
*******************************************/
350
void svt_ext_eight_sad_calculation_32x32_64x64_c(const uint32_t p_sad16x16[16][8], uint32_t* p_best_sad_32x32,
351
                                                 uint32_t* p_best_sad_64x64, uint32_t* p_best_mv32x32,
352
0
                                                 uint32_t* p_best_mv64x64, uint32_t mv, uint32_t p_sad32x32[4][8]) {
353
0
    uint32_t search_index;
354
0
    int16_t  x_mv, y_mv;
355
0
    for (search_index = 0; search_index < 8; search_index++) {
356
0
        uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
357
358
0
        p_sad32x32[0][search_index] = sad32x32_0 = p_sad16x16[0][search_index] + p_sad16x16[1][search_index] +
359
0
            p_sad16x16[2][search_index] + p_sad16x16[3][search_index];
360
0
        if (sad32x32_0 < p_best_sad_32x32[0]) {
361
0
            p_best_sad_32x32[0] = sad32x32_0;
362
0
            x_mv                = _MVXT(mv) + (int16_t)search_index;
363
0
            y_mv                = _MVYT(mv);
364
0
            p_best_mv32x32[0]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
365
0
        }
366
367
0
        p_sad32x32[1][search_index] = sad32x32_1 = p_sad16x16[4][search_index] + p_sad16x16[5][search_index] +
368
0
            p_sad16x16[6][search_index] + p_sad16x16[7][search_index];
369
0
        if (sad32x32_1 < p_best_sad_32x32[1]) {
370
0
            p_best_sad_32x32[1] = sad32x32_1;
371
0
            x_mv                = _MVXT(mv) + (int16_t)search_index;
372
0
            y_mv                = _MVYT(mv);
373
0
            p_best_mv32x32[1]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
374
0
        }
375
376
0
        p_sad32x32[2][search_index] = sad32x32_2 = p_sad16x16[8][search_index] + p_sad16x16[9][search_index] +
377
0
            p_sad16x16[10][search_index] + p_sad16x16[11][search_index];
378
0
        if (sad32x32_2 < p_best_sad_32x32[2]) {
379
0
            p_best_sad_32x32[2] = sad32x32_2;
380
0
            x_mv                = _MVXT(mv) + (int16_t)search_index;
381
0
            y_mv                = _MVYT(mv);
382
0
            p_best_mv32x32[2]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
383
0
        }
384
385
0
        p_sad32x32[3][search_index] = sad32x32_3 = p_sad16x16[12][search_index] + p_sad16x16[13][search_index] +
386
0
            p_sad16x16[14][search_index] + p_sad16x16[15][search_index];
387
0
        if (sad32x32_3 < p_best_sad_32x32[3]) {
388
0
            p_best_sad_32x32[3] = sad32x32_3;
389
0
            x_mv                = _MVXT(mv) + (int16_t)search_index;
390
0
            y_mv                = _MVYT(mv);
391
0
            p_best_mv32x32[3]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
392
0
        }
393
394
0
        sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
395
0
        if (sad64x64 < p_best_sad_64x64[0]) {
396
0
            p_best_sad_64x64[0] = sad64x64;
397
0
            x_mv                = _MVXT(mv) + (int16_t)search_index;
398
0
            y_mv                = _MVYT(mv);
399
0
            p_best_mv64x64[0]   = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv);
400
0
        }
401
0
    }
402
0
}
403
404
/*******************************************
405
 * open_loop_me_get_search_point_results_block
406
 *******************************************/
407
static void open_loop_me_get_eight_search_point_results_block(
408
    MeContext* me_ctx, // input parameter, ME context Ptr, used to get SB Ptr
409
    uint32_t   list_index, // input parameter, reference list index
410
    uint32_t   ref_pic_index,
411
    int32_t    search_region_index, // input parameter, search area origin, used to
412
    // point to reference samples
413
    int32_t x_search_index, // input parameter, search region position in the
414
    // horizontal direction, used to derive xMV
415
    int32_t y_search_index // input parameter, search region position in the
416
    // vertical direction, used to derive yMV
417
0
) {
418
    // uint32_t ref_luma_stride = ref_pic_ptr->y_stride; // NADER
419
    // uint8_t  *ref_ptr = ref_pic_ptr->y_buffer; // NADER
420
0
    const bool sub_sad         = (me_ctx->me_search_method == SUB_SAD_SEARCH);
421
0
    uint32_t   ref_luma_stride = me_ctx->interpolated_full_stride[list_index][ref_pic_index];
422
0
    uint8_t*   ref_ptr         = me_ctx->integer_buffer_ptr[list_index][ref_pic_index] +
423
0
        ((ME_FILTER_TAP >> 1) * me_ctx->interpolated_full_stride[list_index][ref_pic_index]) + (ME_FILTER_TAP >> 1) +
424
0
        search_region_index;
425
426
0
    uint32_t curr_mv_1 = (((uint32_t)y_search_index) << 16);
427
0
    uint16_t curr_mv_2 = ((uint16_t)x_search_index);
428
0
    uint32_t curr_mv   = curr_mv_1 | curr_mv_2;
429
430
0
    svt_ext_all_sad_calculation_8x8_16x16(me_ctx->b64_src_ptr,
431
0
                                          me_ctx->b64_src_stride,
432
0
                                          ref_ptr,
433
0
                                          ref_luma_stride,
434
0
                                          curr_mv,
435
0
                                          me_ctx->p_best_sad_8x8,
436
0
                                          me_ctx->p_best_sad_16x16,
437
0
                                          me_ctx->p_best_mv8x8,
438
0
                                          me_ctx->p_best_mv16x16,
439
0
                                          me_ctx->p_eight_sad16x16,
440
0
                                          me_ctx->p_eight_sad8x8,
441
0
                                          sub_sad);
442
443
0
    svt_ext_eight_sad_calculation_32x32_64x64(me_ctx->p_eight_sad16x16,
444
0
                                              me_ctx->p_best_sad_32x32,
445
0
                                              me_ctx->p_best_sad_64x64,
446
0
                                              me_ctx->p_best_mv32x32,
447
0
                                              me_ctx->p_best_mv64x64,
448
0
                                              curr_mv,
449
0
                                              me_ctx->p_eight_sad32x32);
450
0
}
451
452
/*******************************************
453
 * open_loop_me_get_search_point_results_block
454
 *******************************************/
455
static void open_loop_me_get_search_point_results_block(
456
    MeContext* me_ctx, // input parameter, ME context Ptr, used to get SB Ptr
457
    uint32_t   list_index, // input parameter, reference list index
458
    uint32_t   ref_pic_index,
459
    int32_t    search_region_index, // input parameter, search area origin, used to
460
    // point to reference samples
461
    int32_t x_search_index, // input parameter, search region position in the
462
    // horizontal direction, used to derive xMV
463
    int32_t y_search_index) // input parameter, search region position in the
464
// vertical direction, used to derive yMV
465
0
{
466
0
    const bool sub_sad = (me_ctx->me_search_method == SUB_SAD_SEARCH);
467
0
    uint8_t*   src_ptr = me_ctx->b64_src_ptr;
468
469
    // uint8_t  *ref_ptr = ref_pic_ptr->y_buffer; // NADER
470
0
    uint8_t* ref_ptr = me_ctx->integer_buffer_ptr[list_index][ref_pic_index] + (ME_FILTER_TAP >> 1) +
471
0
        ((ME_FILTER_TAP >> 1) * me_ctx->interpolated_full_stride[list_index][ref_pic_index]);
472
    // uint32_t ref_luma_stride = ref_pic_ptr->y_stride; // NADER
473
0
    uint32_t ref_luma_stride          = me_ctx->interpolated_full_stride[list_index][ref_pic_index];
474
0
    int32_t  search_position_tl_index = search_region_index;
475
0
    int32_t  search_position_index;
476
0
    int32_t  block_index;
477
0
    int32_t  src_next_16x16_offset;
478
    // uint32_t ref_next_16x16_offset = (ref_pic_ptr->y_stride << 4); // NADER
479
0
    uint32_t  ref_next_16x16_offset = (ref_luma_stride << 4);
480
0
    uint32_t  curr_mv_1             = (((uint32_t)y_search_index) << 16);
481
0
    uint16_t  curr_mv_2             = ((uint16_t)x_search_index);
482
0
    uint32_t  curr_mv               = curr_mv_1 | curr_mv_2;
483
0
    uint32_t* p_best_sad_8x8        = me_ctx->p_best_sad_8x8;
484
0
    uint32_t* p_best_sad_16x16      = me_ctx->p_best_sad_16x16;
485
0
    uint32_t* p_best_sad_32x32      = me_ctx->p_best_sad_32x32;
486
0
    uint32_t* p_best_sad_64x64      = me_ctx->p_best_sad_64x64;
487
0
    uint32_t* p_best_mv8x8          = me_ctx->p_best_mv8x8;
488
0
    uint32_t* p_best_mv16x16        = me_ctx->p_best_mv16x16;
489
0
    uint32_t* p_best_mv32x32        = me_ctx->p_best_mv32x32;
490
0
    uint32_t* p_best_mv64x64        = me_ctx->p_best_mv64x64;
491
0
    uint32_t* p_sad32x32            = me_ctx->p_sad32x32;
492
0
    uint32_t* p_sad16x16            = me_ctx->p_sad16x16;
493
0
    uint32_t* p_sad8x8              = me_ctx->p_sad8x8;
494
495
    // TODO: block_index search_position_index could be removed
496
0
    const uint32_t src_stride = me_ctx->b64_src_stride;
497
0
    src_next_16x16_offset     = src_stride << 4;
498
499
    //---- 16x16 : 0
500
0
    block_index           = 0;
501
0
    search_position_index = search_position_tl_index;
502
503
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
504
0
                                      src_stride,
505
0
                                      ref_ptr + search_position_index,
506
0
                                      ref_luma_stride,
507
0
                                      &p_best_sad_8x8[0],
508
0
                                      &p_best_sad_16x16[0],
509
0
                                      &p_best_mv8x8[0],
510
0
                                      &p_best_mv16x16[0],
511
0
                                      curr_mv,
512
0
                                      &p_sad16x16[0],
513
0
                                      &p_sad8x8[0],
514
0
                                      sub_sad);
515
516
    //---- 16x16 : 1
517
0
    block_index           = block_index + 16;
518
0
    search_position_index = search_position_tl_index + 16;
519
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
520
0
                                      src_stride,
521
0
                                      ref_ptr + search_position_index,
522
0
                                      ref_luma_stride,
523
0
                                      &p_best_sad_8x8[4],
524
0
                                      &p_best_sad_16x16[1],
525
0
                                      &p_best_mv8x8[4],
526
0
                                      &p_best_mv16x16[1],
527
0
                                      curr_mv,
528
0
                                      &p_sad16x16[1],
529
0
                                      &p_sad8x8[4],
530
0
                                      sub_sad);
531
    //---- 16x16 : 4
532
0
    block_index           = block_index + 16;
533
0
    search_position_index = search_position_index + 16;
534
535
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
536
0
                                      src_stride,
537
0
                                      ref_ptr + search_position_index,
538
0
                                      ref_luma_stride,
539
0
                                      &p_best_sad_8x8[16],
540
0
                                      &p_best_sad_16x16[4],
541
0
                                      &p_best_mv8x8[16],
542
0
                                      &p_best_mv16x16[4],
543
0
                                      curr_mv,
544
0
                                      &p_sad16x16[4],
545
0
                                      &p_sad8x8[16],
546
0
                                      sub_sad);
547
548
    //---- 16x16 : 5
549
0
    block_index           = block_index + 16;
550
0
    search_position_index = search_position_index + 16;
551
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
552
0
                                      src_stride,
553
0
                                      ref_ptr + search_position_index,
554
0
                                      ref_luma_stride,
555
0
                                      &p_best_sad_8x8[20],
556
0
                                      &p_best_sad_16x16[5],
557
0
                                      &p_best_mv8x8[20],
558
0
                                      &p_best_mv16x16[5],
559
0
                                      curr_mv,
560
0
                                      &p_sad16x16[5],
561
0
                                      &p_sad8x8[20],
562
0
                                      sub_sad);
563
564
    //---- 16x16 : 2
565
0
    block_index           = src_next_16x16_offset;
566
0
    search_position_index = search_position_tl_index + ref_next_16x16_offset;
567
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
568
0
                                      src_stride,
569
0
                                      ref_ptr + search_position_index,
570
0
                                      ref_luma_stride,
571
0
                                      &p_best_sad_8x8[8],
572
0
                                      &p_best_sad_16x16[2],
573
0
                                      &p_best_mv8x8[8],
574
0
                                      &p_best_mv16x16[2],
575
0
                                      curr_mv,
576
0
                                      &p_sad16x16[2],
577
0
                                      &p_sad8x8[8],
578
0
                                      sub_sad);
579
    //---- 16x16 : 3
580
0
    block_index           = block_index + 16;
581
0
    search_position_index = search_position_index + 16;
582
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
583
0
                                      src_stride,
584
0
                                      ref_ptr + search_position_index,
585
0
                                      ref_luma_stride,
586
0
                                      &p_best_sad_8x8[12],
587
0
                                      &p_best_sad_16x16[3],
588
0
                                      &p_best_mv8x8[12],
589
0
                                      &p_best_mv16x16[3],
590
0
                                      curr_mv,
591
0
                                      &p_sad16x16[3],
592
0
                                      &p_sad8x8[12],
593
0
                                      sub_sad);
594
    //---- 16x16 : 6
595
0
    block_index           = block_index + 16;
596
0
    search_position_index = search_position_index + 16;
597
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
598
0
                                      src_stride,
599
0
                                      ref_ptr + search_position_index,
600
0
                                      ref_luma_stride,
601
0
                                      &p_best_sad_8x8[24],
602
0
                                      &p_best_sad_16x16[6],
603
0
                                      &p_best_mv8x8[24],
604
0
                                      &p_best_mv16x16[6],
605
0
                                      curr_mv,
606
0
                                      &p_sad16x16[6],
607
0
                                      &p_sad8x8[24],
608
0
                                      sub_sad);
609
    //---- 16x16 : 7
610
0
    block_index           = block_index + 16;
611
0
    search_position_index = search_position_index + 16;
612
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
613
0
                                      src_stride,
614
0
                                      ref_ptr + search_position_index,
615
0
                                      ref_luma_stride,
616
0
                                      &p_best_sad_8x8[28],
617
0
                                      &p_best_sad_16x16[7],
618
0
                                      &p_best_mv8x8[28],
619
0
                                      &p_best_mv16x16[7],
620
0
                                      curr_mv,
621
0
                                      &p_sad16x16[7],
622
0
                                      &p_sad8x8[28],
623
0
                                      sub_sad);
624
625
    //---- 16x16 : 8
626
0
    block_index           = (src_next_16x16_offset << 1);
627
0
    search_position_index = search_position_tl_index + (ref_next_16x16_offset << 1);
628
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
629
0
                                      src_stride,
630
0
                                      ref_ptr + search_position_index,
631
0
                                      ref_luma_stride,
632
0
                                      &p_best_sad_8x8[32],
633
0
                                      &p_best_sad_16x16[8],
634
0
                                      &p_best_mv8x8[32],
635
0
                                      &p_best_mv16x16[8],
636
0
                                      curr_mv,
637
0
                                      &p_sad16x16[8],
638
0
                                      &p_sad8x8[32],
639
0
                                      sub_sad);
640
    //---- 16x16 : 9
641
0
    block_index           = block_index + 16;
642
0
    search_position_index = search_position_index + 16;
643
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
644
0
                                      src_stride,
645
0
                                      ref_ptr + search_position_index,
646
0
                                      ref_luma_stride,
647
0
                                      &p_best_sad_8x8[36],
648
0
                                      &p_best_sad_16x16[9],
649
0
                                      &p_best_mv8x8[36],
650
0
                                      &p_best_mv16x16[9],
651
0
                                      curr_mv,
652
0
                                      &p_sad16x16[9],
653
0
                                      &p_sad8x8[36],
654
0
                                      sub_sad);
655
    //---- 16x16 : 12
656
0
    block_index           = block_index + 16;
657
0
    search_position_index = search_position_index + 16;
658
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
659
0
                                      src_stride,
660
0
                                      ref_ptr + search_position_index,
661
0
                                      ref_luma_stride,
662
0
                                      &p_best_sad_8x8[48],
663
0
                                      &p_best_sad_16x16[12],
664
0
                                      &p_best_mv8x8[48],
665
0
                                      &p_best_mv16x16[12],
666
0
                                      curr_mv,
667
0
                                      &p_sad16x16[12],
668
0
                                      &p_sad8x8[48],
669
0
                                      sub_sad);
670
    //---- 16x16 : 13
671
0
    block_index           = block_index + 16;
672
0
    search_position_index = search_position_index + 16;
673
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
674
0
                                      src_stride,
675
0
                                      ref_ptr + search_position_index,
676
0
                                      ref_luma_stride,
677
0
                                      &p_best_sad_8x8[52],
678
0
                                      &p_best_sad_16x16[13],
679
0
                                      &p_best_mv8x8[52],
680
0
                                      &p_best_mv16x16[13],
681
0
                                      curr_mv,
682
0
                                      &p_sad16x16[13],
683
0
                                      &p_sad8x8[52],
684
0
                                      sub_sad);
685
686
    //---- 16x16 : 10
687
0
    block_index           = (src_next_16x16_offset * 3);
688
0
    search_position_index = search_position_tl_index + (ref_next_16x16_offset * 3);
689
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
690
0
                                      src_stride,
691
0
                                      ref_ptr + search_position_index,
692
0
                                      ref_luma_stride,
693
0
                                      &p_best_sad_8x8[40],
694
0
                                      &p_best_sad_16x16[10],
695
0
                                      &p_best_mv8x8[40],
696
0
                                      &p_best_mv16x16[10],
697
0
                                      curr_mv,
698
0
                                      &p_sad16x16[10],
699
0
                                      &p_sad8x8[40],
700
0
                                      sub_sad);
701
    //---- 16x16 : 11
702
0
    block_index           = block_index + 16;
703
0
    search_position_index = search_position_index + 16;
704
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
705
0
                                      src_stride,
706
0
                                      ref_ptr + search_position_index,
707
0
                                      ref_luma_stride,
708
0
                                      &p_best_sad_8x8[44],
709
0
                                      &p_best_sad_16x16[11],
710
0
                                      &p_best_mv8x8[44],
711
0
                                      &p_best_mv16x16[11],
712
0
                                      curr_mv,
713
0
                                      &p_sad16x16[11],
714
0
                                      &p_sad8x8[44],
715
0
                                      sub_sad);
716
    //---- 16x16 : 14
717
0
    block_index           = block_index + 16;
718
0
    search_position_index = search_position_index + 16;
719
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
720
0
                                      src_stride,
721
0
                                      ref_ptr + search_position_index,
722
0
                                      ref_luma_stride,
723
0
                                      &p_best_sad_8x8[56],
724
0
                                      &p_best_sad_16x16[14],
725
0
                                      &p_best_mv8x8[56],
726
0
                                      &p_best_mv16x16[14],
727
0
                                      curr_mv,
728
0
                                      &p_sad16x16[14],
729
0
                                      &p_sad8x8[56],
730
0
                                      sub_sad);
731
    //---- 16x16 : 15
732
0
    block_index           = block_index + 16;
733
0
    search_position_index = search_position_index + 16;
734
0
    svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index,
735
0
                                      src_stride,
736
0
                                      ref_ptr + search_position_index,
737
0
                                      ref_luma_stride,
738
0
                                      &p_best_sad_8x8[60],
739
0
                                      &p_best_sad_16x16[15],
740
0
                                      &p_best_mv8x8[60],
741
0
                                      &p_best_mv16x16[15],
742
0
                                      curr_mv,
743
0
                                      &p_sad16x16[15],
744
0
                                      &p_sad8x8[60],
745
0
                                      sub_sad);
746
747
0
    svt_ext_sad_calculation_32x32_64x64(
748
0
        p_sad16x16, p_best_sad_32x32, p_best_sad_64x64, p_best_mv32x32, p_best_mv64x64, curr_mv, &p_sad32x32[0]);
749
0
}
750
751
/*******************************************
752
 * open_loop_me_fullpel_search_sblock
753
 *******************************************/
754
static void open_loop_me_fullpel_search_sblock(MeContext* me_ctx, uint32_t list_index, uint32_t ref_pic_index,
755
                                               int16_t x_search_area_origin, int16_t y_search_area_origin,
756
0
                                               uint32_t search_area_width, uint32_t search_area_height) {
757
0
    uint32_t x_search_index, y_search_index;
758
0
    uint32_t search_area_width_rest_8 = search_area_width & 7;
759
0
    uint32_t search_area_width_mult_8 = search_area_width - search_area_width_rest_8;
760
761
0
    for (y_search_index = 0; y_search_index < search_area_height; y_search_index++) {
762
0
        for (x_search_index = 0; x_search_index < search_area_width_mult_8; x_search_index += 8) {
763
            // this function will do:  x_search_index, +1, +2, ..., +7
764
0
            open_loop_me_get_eight_search_point_results_block(
765
0
                me_ctx,
766
0
                list_index,
767
0
                ref_pic_index,
768
0
                x_search_index + y_search_index * me_ctx->interpolated_full_stride[list_index][ref_pic_index],
769
0
                (int32_t)x_search_index + x_search_area_origin,
770
0
                (int32_t)y_search_index + y_search_area_origin);
771
0
        }
772
773
0
        for (x_search_index = search_area_width_mult_8; x_search_index < search_area_width; x_search_index++) {
774
0
            open_loop_me_get_search_point_results_block(
775
0
                me_ctx,
776
0
                list_index,
777
0
                ref_pic_index,
778
0
                x_search_index + y_search_index * me_ctx->interpolated_full_stride[list_index][ref_pic_index],
779
0
                (int32_t)x_search_index + x_search_area_origin,
780
0
                (int32_t)y_search_index + y_search_area_origin);
781
0
        }
782
0
    }
783
0
}
784
785
// Perform HME Level 0 for one 64x64 block on the given picture
786
static void hme_level_0(MeContext*           me_ctx, // ME context Ptr, used to get/update ME results
787
                        int16_t              org_x, // Block position in the horizontal direction- sixteenth resolution
788
                        int16_t              org_y, // Block position in the vertical direction- sixteenth resolution
789
                        uint32_t             block_width, // Block width - sixteenth resolution
790
                        uint32_t             block_height, // Block height - sixteenth resolution
791
                        int16_t              sa_width, // search area width
792
                        int16_t              sa_height, // search area height
793
                        EbPictureBufferDesc* sixteenth_ref_pic_ptr, // sixteenth-downsampled reference picture
794
                        uint32_t             sr_w, // current search region index in the horizontal direction
795
                        uint32_t             sr_h, // current search region index in the vertical direction
796
                        uint64_t*            best_sad, // output: Level0 SAD at (sr_w, sr_h)
797
                        int16_t*             hme_l0_sc_x, // output: Level0 xMV at (sr_w, sr_h)
798
                        int16_t*             hme_l0_sc_y // output: Level0 yMV at (sr_w, sr_h)
799
0
) {
800
    // round up the search region width to nearest multiple of 8 because the SAD calculation performance (for
801
    // intrinsic functions) is the same for search region width from 1 to 8
802
0
    sa_width           = (int16_t)((sa_width + 7) & ~0x07);
803
0
    int16_t pad_width  = (int16_t)(sixteenth_ref_pic_ptr->border) - 1;
804
0
    int16_t pad_height = (int16_t)(sixteenth_ref_pic_ptr->border) - 1;
805
806
0
    int16_t x_search_region_distance = sa_width * sr_w;
807
0
    int16_t y_search_region_distance = sa_height * sr_h;
808
0
    int16_t sa_origin_x              = -(int16_t)((sa_width * me_ctx->num_hme_sa_w) >> 1) + x_search_region_distance;
809
0
    int16_t sa_origin_y              = -(int16_t)((sa_height * me_ctx->num_hme_sa_h) >> 1) + y_search_region_distance;
810
    // Correct the left edge of the Search Area if it is not on the reference picture
811
0
    if (((org_x + sa_origin_x) < -pad_width)) {
812
0
        sa_origin_x = -pad_width - org_x;
813
0
        sa_width    = sa_width - (-pad_width - (org_x + sa_origin_x));
814
0
    }
815
816
    // Correct the right edge of the Search Area if its not on the reference picture
817
0
    if (((org_x + sa_origin_x) > (int16_t)sixteenth_ref_pic_ptr->width - 1)) {
818
0
        sa_origin_x = sa_origin_x - ((org_x + sa_origin_x) - ((int16_t)sixteenth_ref_pic_ptr->width - 1));
819
0
    }
820
821
0
    if (((org_x + sa_origin_x + sa_width) > (int16_t)sixteenth_ref_pic_ptr->width)) {
822
0
        sa_width = MAX(1, sa_width - ((org_x + sa_origin_x + sa_width) - (int16_t)sixteenth_ref_pic_ptr->width));
823
0
    }
824
    // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping alrea performed)
825
0
    sa_width = (sa_width < 8) ? sa_width : sa_width & ~0x07;
826
    // Correct the top edge of the Search Area if it is not on the reference picture
827
0
    if (((org_y + sa_origin_y) < -pad_height)) {
828
0
        sa_origin_y = -pad_height - org_y;
829
0
        sa_height   = sa_height - (-pad_height - (org_y + sa_origin_y));
830
0
    }
831
832
    // Correct the bottom edge of the Search Area if its not on the reference picture
833
0
    if (((org_y + sa_origin_y) > (int16_t)sixteenth_ref_pic_ptr->height - 1)) {
834
0
        sa_origin_y = sa_origin_y - ((org_y + sa_origin_y) - ((int16_t)sixteenth_ref_pic_ptr->height - 1));
835
0
    }
836
837
0
    if ((org_y + sa_origin_y + sa_height > (int16_t)sixteenth_ref_pic_ptr->height)) {
838
0
        sa_height = MAX(1, sa_height - ((org_y + sa_origin_y + sa_height) - (int16_t)sixteenth_ref_pic_ptr->height));
839
0
    }
840
841
    // Move to the top left of the search region
842
0
    int16_t x_top_left_search_region = (org_x) + sa_origin_x;
843
0
    int16_t y_top_left_search_region = (org_y) + sa_origin_y;
844
0
    int32_t search_region_index = x_top_left_search_region + y_top_left_search_region * sixteenth_ref_pic_ptr->y_stride;
845
846
    // Put the first search location into level0 results
847
0
    svt_sad_loop_kernel(&me_ctx->sixteenth_b64_buffer[0],
848
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? me_ctx->sixteenth_b64_buffer_stride
849
0
                                                                       : me_ctx->sixteenth_b64_buffer_stride * 2,
850
0
                        &sixteenth_ref_pic_ptr->y_buffer[search_region_index],
851
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? sixteenth_ref_pic_ptr->y_stride
852
0
                                                                       : sixteenth_ref_pic_ptr->y_stride * 2,
853
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? block_height : block_height >> 1,
854
0
                        block_width,
855
                        /* results */
856
0
                        best_sad,
857
0
                        hme_l0_sc_x,
858
0
                        hme_l0_sc_y,
859
                        /* range */
860
0
                        sixteenth_ref_pic_ptr->y_stride,
861
0
                        0, // skip search line
862
0
                        sa_width,
863
0
                        sa_height);
864
865
0
    *best_sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH)
866
0
        ? *best_sad
867
0
        : *best_sad * 2; // Multiply by 2 because considered only ever other line
868
0
    *hme_l0_sc_x += sa_origin_x;
869
0
    *hme_l0_sc_x *= 4; // Multiply by 4 because operating on 1/4 resolution
870
0
    *hme_l0_sc_y += sa_origin_y;
871
0
    *hme_l0_sc_y *= 4; // Multiply by 4 because operating on 1/4 resolution
872
873
0
    return;
874
0
}
875
876
// Perform HME Level 1 for one 64x64 block on the given picture
877
static void hme_level_1(MeContext*           me_ctx, // ME context Ptr, used to get/update ME results
878
                        int16_t              org_x, // Block position in the horizontal direction - quarter resolution
879
                        int16_t              org_y, // Block position in the vertical direction - quarter resolution
880
                        uint32_t             block_width, // Block width - quarter resolution
881
                        uint32_t             block_height, // Block height - quarter resolution
882
                        EbPictureBufferDesc* quarter_ref_pic_ptr, // quarter reference picture
883
                        int16_t              sa_width, // hme level 1 search area in width
884
                        int16_t              sa_height, // hme level 1 search area in height
885
                        int16_t              hme_l0_sc_x, // input parameter, best Level0 xMV at (sr_w, sr_h)
886
                        int16_t              hme_l0_sc_y, // input parameter, best Level0 yMV at (sr_w, sr_h)
887
                        uint64_t*            best_sad, // output parameter, Level1 SAD at (sr_w, sr_h)
888
                        int16_t*             hme_l1_sc_x, // output parameter, Level1 xMV at (sr_w, sr_h)
889
                        int16_t*             hme_l1_sc_y // output parameter, Level1 yMV at (sr_w, sr_h)
890
0
) {
891
    // round up the search region width to nearest multiple of 8 because the SAD calculation performance (for
892
    // intrinsic functions) is the same for search region width from 1 to 8
893
0
    sa_width = (int16_t)((sa_width + 7) & ~0x07);
894
895
0
    int16_t pad_width  = (int16_t)(quarter_ref_pic_ptr->border) - 1;
896
0
    int16_t pad_height = (int16_t)(quarter_ref_pic_ptr->border) - 1;
897
898
0
    int16_t sa_origin_x = -(sa_width >> 1) + hme_l0_sc_x;
899
0
    int16_t sa_origin_y = -(sa_height >> 1) + hme_l0_sc_y;
900
901
    // Correct the left edge of the Search Area if it is not on the reference picture
902
0
    if (((org_x + sa_origin_x) < -pad_width)) {
903
0
        sa_origin_x = -pad_width - org_x;
904
0
        sa_width    = sa_width - (-pad_width - (org_x + sa_origin_x));
905
0
    }
906
907
    // Correct the right edge of the Search Area if its not on the reference picture
908
0
    if (((org_x + sa_origin_x) > (int16_t)quarter_ref_pic_ptr->width - 1)) {
909
0
        sa_origin_x = sa_origin_x - ((org_x + sa_origin_x) - ((int16_t)quarter_ref_pic_ptr->width - 1));
910
0
    }
911
912
0
    if (((org_x + sa_origin_x + sa_width) > (int16_t)quarter_ref_pic_ptr->width)) {
913
0
        sa_width = MAX(1, sa_width - ((org_x + sa_origin_x + sa_width) - (int16_t)quarter_ref_pic_ptr->width));
914
0
    }
915
916
    // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping alrea performed)
917
0
    sa_width = (sa_width < 8) ? sa_width : sa_width & ~0x07;
918
919
    // Correct the top edge of the Search Area if it is not on the reference picture
920
0
    if (((org_y + sa_origin_y) < -pad_height)) {
921
0
        sa_origin_y = -pad_height - org_y;
922
0
        sa_height   = sa_height - (-pad_height - (org_y + sa_origin_y));
923
0
    }
924
925
    // Correct the bottom edge of the Search Area if its not on the reference picture
926
0
    if (((org_y + sa_origin_y) > (int16_t)quarter_ref_pic_ptr->height - 1)) {
927
0
        sa_origin_y = sa_origin_y - ((org_y + sa_origin_y) - ((int16_t)quarter_ref_pic_ptr->height - 1));
928
0
    }
929
930
0
    if ((org_y + sa_origin_y + sa_height > (int16_t)quarter_ref_pic_ptr->height)) {
931
0
        sa_height = MAX(1, sa_height - ((org_y + sa_origin_y + sa_height) - (int16_t)quarter_ref_pic_ptr->height));
932
0
    }
933
934
    // Move to the top left of the search region
935
0
    int16_t x_top_left_search_region = (org_x) + sa_origin_x;
936
0
    int16_t y_top_left_search_region = (org_y) + sa_origin_y;
937
0
    int32_t search_region_index = x_top_left_search_region + y_top_left_search_region * quarter_ref_pic_ptr->y_stride;
938
939
    // Put the first search location into level1 results
940
0
    svt_sad_loop_kernel(&me_ctx->quarter_b64_buffer[0],
941
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? me_ctx->quarter_b64_buffer_stride
942
0
                                                                       : me_ctx->quarter_b64_buffer_stride * 2,
943
0
                        &quarter_ref_pic_ptr->y_buffer[search_region_index],
944
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? quarter_ref_pic_ptr->y_stride
945
0
                                                                       : quarter_ref_pic_ptr->y_stride * 2,
946
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? block_height : block_height >> 1,
947
0
                        block_width,
948
                        /* results */
949
0
                        best_sad,
950
0
                        hme_l1_sc_x,
951
0
                        hme_l1_sc_y,
952
                        /* range */
953
0
                        quarter_ref_pic_ptr->y_stride,
954
0
                        0, // skip search line
955
0
                        sa_width,
956
0
                        sa_height);
957
958
0
    *best_sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH)
959
0
        ? *best_sad
960
0
        : *best_sad * 2; // Multiply by 2 because considered only ever other line
961
0
    *hme_l1_sc_x += sa_origin_x;
962
0
    *hme_l1_sc_x *= 2; // Multiply by 2 because operating on 1/2 resolution
963
0
    *hme_l1_sc_y += sa_origin_y;
964
0
    *hme_l1_sc_y *= 2; // Multiply by 2 because operating on 1/2 resolution
965
966
0
    return;
967
0
}
968
969
// Perform HME Level 2 for one 64x64 block on the given picture
970
void hme_level_2(MeContext*           me_ctx, // ME context Ptr, used to get/update ME results
971
                 int16_t              org_x, // Block position in the horizontal direction
972
                 int16_t              org_y, // Block position in the vertical direction
973
                 uint32_t             block_width, // Block pwidth - full resolution
974
                 uint32_t             block_height, // Block height - full resolution
975
                 EbPictureBufferDesc* ref_pic_ptr, // reference picture
976
                 int16_t              sa_width, // hme level 1 search area in width
977
                 int16_t              sa_height, // hme level 1 search area in height
978
                 int16_t              hme_l1_sc_x, // best Level1 xMV at (sr_w, sr_h)
979
                 int16_t              hme_l1_sc_y, // best Level1 yMV at (sr_w, sr_h)
980
                 uint64_t*            best_sad, // Level2 SAD at (sr_w, sr_h)
981
                 int16_t*             hme_l2_sc_x, // Level2 xMV at (sr_w, sr_h)
982
                 int16_t*             hme_l2_sc_y // Level2 yMV at (sr_w, sr_h)
983
0
) {
984
    // round up the search region width to nearest multiple of 8 because the SAD calculation performance (for
985
    // intrinsic functions) is the same for search region width from 1 to 8
986
0
    sa_width = (int16_t)((sa_width + 7) & ~0x07);
987
988
0
    int16_t pad_width  = (int16_t)BLOCK_SIZE_64 - 1;
989
0
    int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1;
990
991
0
    int16_t sa_origin_x = -(sa_width >> 1) + hme_l1_sc_x;
992
0
    int16_t sa_origin_y = -(sa_height >> 1) + hme_l1_sc_y;
993
994
    // Correct the left edge of the Search Area if it is not on the reference picture
995
0
    if (((org_x + sa_origin_x) < -pad_width)) {
996
0
        sa_origin_x = -pad_width - org_x;
997
0
        sa_width    = sa_width - (-pad_width - (org_x + sa_origin_x));
998
0
    }
999
1000
    // Correct the right edge of the Search Area if its not on the reference picture
1001
0
    if (((org_x + sa_origin_x) > (int16_t)ref_pic_ptr->width - 1)) {
1002
0
        sa_origin_x = sa_origin_x - ((org_x + sa_origin_x) - ((int16_t)ref_pic_ptr->width - 1));
1003
0
    }
1004
1005
0
    if (((org_x + sa_origin_x + sa_width) > (int16_t)ref_pic_ptr->width)) {
1006
0
        sa_width = MAX(1, sa_width - ((org_x + sa_origin_x + sa_width) - (int16_t)ref_pic_ptr->width));
1007
0
    }
1008
1009
    // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping already performed)
1010
0
    sa_width = (sa_width < 8) ? sa_width : sa_width & ~0x07;
1011
1012
    // Correct the top edge of the Search Area if it is not on the reference picture
1013
0
    if (((org_y + sa_origin_y) < -pad_height)) {
1014
0
        sa_origin_y = -pad_height - org_y;
1015
0
        sa_height   = sa_height - (-pad_height - (org_y + sa_origin_y));
1016
0
    }
1017
1018
    // Correct the bottom edge of the Search Area if its not on the reference picture
1019
0
    if (((org_y + sa_origin_y) > (int16_t)ref_pic_ptr->height - 1)) {
1020
0
        sa_origin_y = sa_origin_y - ((org_y + sa_origin_y) - ((int16_t)ref_pic_ptr->height - 1));
1021
0
    }
1022
1023
0
    if ((org_y + sa_origin_y + sa_height > (int16_t)ref_pic_ptr->height)) {
1024
0
        sa_height = MAX(1, sa_height - ((org_y + sa_origin_y + sa_height) - (int16_t)ref_pic_ptr->height));
1025
0
    }
1026
1027
    // Move to the top left of the search region
1028
0
    int16_t x_top_left_search_region = (org_x) + sa_origin_x;
1029
0
    int16_t y_top_left_search_region = (org_y) + sa_origin_y;
1030
0
    int32_t search_region_index      = x_top_left_search_region + y_top_left_search_region * ref_pic_ptr->y_stride;
1031
1032
    // Put the first search location into level2 results
1033
0
    svt_sad_loop_kernel(
1034
0
        me_ctx->b64_src_ptr,
1035
0
        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? me_ctx->b64_src_stride : me_ctx->b64_src_stride * 2,
1036
0
        &ref_pic_ptr->y_buffer[search_region_index],
1037
0
        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? ref_pic_ptr->y_stride : ref_pic_ptr->y_stride * 2,
1038
0
        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? block_height : block_height >> 1,
1039
0
        block_width,
1040
        /* results */
1041
0
        best_sad,
1042
0
        hme_l2_sc_x,
1043
0
        hme_l2_sc_y,
1044
        /* range */
1045
0
        ref_pic_ptr->y_stride,
1046
0
        0, // skip search line
1047
0
        sa_width,
1048
0
        sa_height);
1049
1050
0
    *best_sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH)
1051
0
        ? *best_sad
1052
0
        : *best_sad * 2; // Multiply by 2 because considered only ever other line
1053
0
    *hme_l2_sc_x += sa_origin_x;
1054
0
    *hme_l2_sc_y += sa_origin_y;
1055
1056
0
    return;
1057
0
}
1058
1059
uint32_t check_00_center(EbPictureBufferDesc* ref_pic_ptr, MeContext* me_ctx, uint32_t sb_origin_x,
1060
                         uint32_t sb_origin_y, uint32_t sb_width, uint32_t sb_height, int16_t* x_search_center,
1061
                         int16_t* y_search_center, uint32_t zz_sad)
1062
1063
0
{
1064
0
    const int16_t org_x         = (int16_t)sb_origin_x;
1065
0
    const int16_t org_y         = (int16_t)sb_origin_y;
1066
0
    const int     subsample_sad = 1;
1067
0
    const int16_t pad_width     = (int16_t)BLOCK_SIZE_64 - 1;
1068
0
    const int16_t pad_height    = (int16_t)BLOCK_SIZE_64 - 1;
1069
1070
0
    int32_t  search_region_index = org_x + (org_y)*ref_pic_ptr->y_stride;
1071
0
    uint64_t zero_mv_sad;
1072
0
    if (me_ctx->me_early_exit_th) {
1073
0
        zero_mv_sad = zz_sad;
1074
0
    } else {
1075
0
        zero_mv_sad = svt_nxm_sad_kernel(me_ctx->b64_src_ptr,
1076
0
                                         me_ctx->b64_src_stride << subsample_sad,
1077
0
                                         &(ref_pic_ptr->y_buffer[search_region_index]),
1078
0
                                         ref_pic_ptr->y_stride << subsample_sad,
1079
0
                                         sb_height >> subsample_sad,
1080
0
                                         sb_width);
1081
0
    }
1082
1083
0
    zero_mv_sad = zero_mv_sad << subsample_sad;
1084
1085
    // FIX
1086
    // Correct the left edge of the Search Area if it is not on the reference
1087
    // Picture
1088
0
    *x_search_center = ((org_x + *x_search_center) < -pad_width) ? -pad_width - org_x : *x_search_center;
1089
    // Correct the right edge of the Search Area if its not on the reference
1090
    // Picture
1091
0
    *x_search_center = ((org_x + *x_search_center) > (int16_t)ref_pic_ptr->width - 1)
1092
0
        ? *x_search_center - ((org_x + *x_search_center) - ((int16_t)ref_pic_ptr->width - 1))
1093
0
        : *x_search_center;
1094
    // Correct the top edge of the Search Area if it is not on the reference
1095
    // Picture
1096
0
    *y_search_center = ((org_y + *y_search_center) < -pad_height) ? -pad_height - org_y : *y_search_center;
1097
    // Correct the bottom edge of the Search Area if its not on the reference
1098
    // Picture
1099
0
    *y_search_center = ((org_y + *y_search_center) > (int16_t)ref_pic_ptr->height - 1)
1100
0
        ? *y_search_center - ((org_y + *y_search_center) - ((int16_t)ref_pic_ptr->height - 1))
1101
0
        : *y_search_center;
1102
    ///
1103
1104
0
    uint64_t zero_mv_cost = zero_mv_sad << COST_PRECISION;
1105
0
    search_region_index   = (int16_t)(org_x) + *x_search_center +
1106
0
        ((int16_t)(org_y) + *y_search_center) * ref_pic_ptr->y_stride;
1107
1108
0
    uint64_t hme_mv_sad = svt_nxm_sad_kernel(me_ctx->b64_src_ptr,
1109
0
                                             me_ctx->b64_src_stride << subsample_sad,
1110
0
                                             &(ref_pic_ptr->y_buffer[search_region_index]),
1111
0
                                             ref_pic_ptr->y_stride << subsample_sad,
1112
0
                                             sb_height >> subsample_sad,
1113
0
                                             sb_width);
1114
1115
0
    hme_mv_sad                  = hme_mv_sad << subsample_sad;
1116
0
    uint64_t hme_mv_cost        = hme_mv_sad << COST_PRECISION;
1117
0
    uint64_t search_center_cost = MIN(zero_mv_cost, hme_mv_cost);
1118
1119
0
    *x_search_center = (search_center_cost == zero_mv_cost) ? 0 : *x_search_center;
1120
0
    *y_search_center = (search_center_cost == zero_mv_cost) ? 0 : *y_search_center;
1121
0
    return hme_mv_sad;
1122
0
}
1123
1124
// get ME references based on level:
1125
// level: 0 => sixteenth, 1 => quarter, 2 => original
1126
1127
static EbPictureBufferDesc* get_me_reference(PictureParentControlSet* pcs, MeContext* me_ctx, uint8_t list_index,
1128
                                             uint8_t ref_pic_index, uint8_t level, uint16_t* dist, uint16_t input_width,
1129
0
                                             uint16_t input_height) {
1130
0
    EbPictureBufferDesc* ref_pic_ptr;
1131
0
    ref_pic_ptr = level == 0 ? me_ctx->me_ds_ref_array[list_index][ref_pic_index].sixteenth_picture_ptr
1132
0
        : level == 1         ? me_ctx->me_ds_ref_array[list_index][ref_pic_index].quarter_picture_ptr
1133
0
                             : me_ctx->me_ds_ref_array[list_index][ref_pic_index].picture_ptr;
1134
1135
0
    if ((input_width >> (2 - level)) != ref_pic_ptr->width || (input_height >> (2 - level)) != ref_pic_ptr->height) {
1136
0
        SVT_WARN("picture %3llu: HME level%d resolution mismatch! input (%dx%d) != (%dx%d) pa ref. \n",
1137
0
                 pcs->picture_number,
1138
0
                 level,
1139
0
                 input_width >> (2 - level),
1140
0
                 input_height >> (2 - level),
1141
0
                 ref_pic_ptr->width,
1142
0
                 ref_pic_ptr->height);
1143
0
    }
1144
1145
0
    *dist = (int16_t)ABS((int64_t)pcs->picture_number -
1146
0
                         (int64_t)me_ctx->me_ds_ref_array[list_index][ref_pic_index].picture_number);
1147
0
    return ref_pic_ptr;
1148
0
}
1149
1150
// factor to slowdown the ME search region growth to MAX
1151
0
uint16_t svt_aom_get_scaled_picture_distance(uint16_t dist) {
1152
0
    uint8_t round_up = ((dist % 8) == 0) ? 0 : 1;
1153
0
    return ((dist * 5) / 8) + round_up;
1154
0
}
1155
1156
static const double search_area_multipliers[3][5] = {
1157
    {1.0, 1.0, 3.0, 4.0, 5.0}, /* boost=1 */
1158
    {1.0, 1.0, 2.5, 3.5, 4.5}, /* boost=2 */
1159
    {1.0, 1.0, 2.0, 2.5, 3.5} /* boost=3 */
1160
};
1161
1162
0
static void apply_me_sa_boost(int16_t* width, int16_t* height, uint64_t hme_sad, int sc_class_me_boost) {
1163
0
    int index;
1164
0
    if (hme_sad > 4 * 64 * 64) {
1165
0
        index = 4;
1166
0
    } else if (hme_sad > 3 * 64 * 64) {
1167
0
        index = 3;
1168
0
    } else if (hme_sad > 2 * 64 * 64) {
1169
0
        index = 2;
1170
0
    } else {
1171
0
        index = 0;
1172
0
    }
1173
1174
0
    const double mult = search_area_multipliers[sc_class_me_boost - 1][index];
1175
1176
0
    *width  = (int16_t)(*width * mult);
1177
0
    *height = (int16_t)(*height * mult);
1178
0
}
1179
1180
/*******************************************
1181
 *   performs integer search motion estimation for
1182
 all avaiable references frames
1183
 *******************************************/
1184
static void integer_search_b64(PictureParentControlSet* pcs, MeContext* me_ctx, uint32_t b64_origin_x,
1185
0
                               uint32_t b64_origin_y, EbPictureBufferDesc* input_ptr) {
1186
0
    int16_t  picture_width  = pcs->aligned_width;
1187
0
    int16_t  picture_height = pcs->aligned_height;
1188
0
    uint32_t b64_width      = me_ctx->b64_width;
1189
0
    uint32_t b64_height     = me_ctx->b64_height;
1190
0
    int16_t  pad_width      = (int16_t)BLOCK_SIZE_64 - 1;
1191
0
    int16_t  pad_height     = (int16_t)BLOCK_SIZE_64 - 1;
1192
0
    int16_t  org_x          = (int16_t)b64_origin_x;
1193
0
    int16_t  org_y          = (int16_t)b64_origin_y;
1194
0
    int16_t  search_area_width;
1195
0
    int16_t  search_area_height;
1196
0
    int16_t  x_search_area_origin;
1197
0
    int16_t  y_search_area_origin;
1198
0
    int16_t  x_top_left_search_region;
1199
0
    int16_t  y_top_left_search_region;
1200
0
    int32_t  search_region_index;
1201
0
    uint32_t num_of_list_to_search;
1202
0
    uint32_t list_index;
1203
0
    uint8_t  ref_pic_index;
1204
    // Final ME Search Center
1205
0
    int16_t              x_search_center = 0;
1206
0
    int16_t              y_search_center = 0;
1207
0
    EbPictureBufferDesc* ref_pic_ptr;
1208
0
    num_of_list_to_search = me_ctx->num_of_list_to_search;
1209
1210
    // Uni-Prediction motion estimation loop
1211
    // List Loop
1212
0
    for (list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) {
1213
0
        uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
1214
1215
        // Ref Picture Loop
1216
0
        for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) {
1217
0
            uint16_t dist = 0;
1218
0
            ref_pic_ptr   = get_me_reference(
1219
0
                pcs, me_ctx, list_index, ref_pic_index, 2, &dist, input_ptr->width, input_ptr->height);
1220
            // Get hme results
1221
0
            if (me_ctx->search_results[list_index][ref_pic_index].do_ref == 0) {
1222
0
                continue; //so will not get ME results for those references.
1223
0
            }
1224
0
            x_search_center    = me_ctx->search_results[list_index][ref_pic_index].hme_sc_x;
1225
0
            y_search_center    = me_ctx->search_results[list_index][ref_pic_index].hme_sc_y;
1226
0
            search_area_width  = me_ctx->me_sa.sa_min.width;
1227
0
            search_area_height = me_ctx->me_sa.sa_min.height;
1228
1229
            // factor to slowdown the ME search region growth to MAX
1230
0
            if (me_ctx->me_type != ME_MCTF) {
1231
0
                dist = svt_aom_get_scaled_picture_distance(dist);
1232
0
            }
1233
0
            search_area_width  = MIN((search_area_width * dist), me_ctx->me_sa.sa_max.width);
1234
0
            search_area_height = MIN((search_area_height * dist), me_ctx->me_sa.sa_max.height);
1235
0
            if (me_ctx->mv_based_sa_adj.enabled && (!me_ctx->mv_based_sa_adj.nearest_ref_only || ref_pic_index == 0)) {
1236
0
                if (ABS(x_search_center) > me_ctx->mv_based_sa_adj.mv_size_th) {
1237
0
                    search_area_width *= me_ctx->mv_based_sa_adj.sa_multiplier;
1238
0
                }
1239
0
                if (ABS(y_search_center) > me_ctx->mv_based_sa_adj.mv_size_th) {
1240
0
                    search_area_height *= me_ctx->mv_based_sa_adj.sa_multiplier;
1241
0
                }
1242
0
            }
1243
0
            if (me_ctx->sc_class_me_boost &&
1244
0
                (pcs->ahd_error == (uint32_t)~0 || // Use ahd_error only when it is derived
1245
0
                 pcs->ahd_error <
1246
0
                     ((((20 * pcs->enhanced_pic->width * pcs->enhanced_pic->height) / 128)) *
1247
0
                      (uint32_t)(INPUT_SIZE_COUNT -
1248
0
                                 pcs->input_resolution)))) { // Only if there are low temporal variations between frames
1249
0
                const uint64_t hme_sad = me_ctx->search_results[list_index][ref_pic_index].hme_sad;
1250
0
                apply_me_sa_boost(&search_area_width, &search_area_height, hme_sad, me_ctx->sc_class_me_boost);
1251
0
            }
1252
            // Constrain x_ME to be a multiple of 8 (round up)
1253
            // Update ME search reagion size based on hme-data
1254
0
            search_area_width = (MAX(1, (search_area_width / me_ctx->reduce_me_sr_divisor[list_index][ref_pic_index])) +
1255
0
                                 7) &
1256
0
                ~0x07;
1257
0
            search_area_height = MAX(3, (search_area_height / me_ctx->reduce_me_sr_divisor[list_index][ref_pic_index]));
1258
0
            int16_t  search_area_height_before_sr_reduction = search_area_height;
1259
0
            uint64_t best_hme_sad                           = (uint64_t)~0;
1260
0
            if (me_ctx->me_early_exit_th) {
1261
0
                if (me_ctx->zz_sad[list_index][ref_pic_index] < (me_ctx->me_early_exit_th / 6)) {
1262
0
                    search_area_width  = 1;
1263
0
                    search_area_height = 1;
1264
0
                }
1265
0
            } else {
1266
0
                uint8_t hme_is_accuarte = 1;
1267
0
                if ((x_search_center != 0 || y_search_center != 0) && (me_ctx->is_ref == true)) {
1268
0
                    best_hme_sad = check_00_center(ref_pic_ptr,
1269
0
                                                   me_ctx,
1270
0
                                                   b64_origin_x,
1271
0
                                                   b64_origin_y,
1272
0
                                                   b64_width,
1273
0
                                                   b64_height,
1274
0
                                                   &x_search_center,
1275
0
                                                   &y_search_center,
1276
0
                                                   me_ctx->zz_sad[list_index][ref_pic_index]);
1277
1278
0
                    if (x_search_center == 0 && y_search_center == 0) {
1279
0
                        hme_is_accuarte = 0;
1280
0
                    }
1281
0
                }
1282
0
                if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment == 2) {
1283
0
                    if ((hme_is_accuarte && (best_hme_sad < (24 * 24))) ||
1284
0
                        (me_ctx->is_ref && me_ctx->search_results[list_index][ref_pic_index].hme_sad < (24 * 24))) {
1285
0
                        search_area_height = search_area_height / 2;
1286
0
                    }
1287
0
                    if (list_index || ref_pic_index) {
1288
0
                        if (me_ctx->p_sb_best_sad[0][0][0] < 5000) {
1289
0
                            if (search_area_height == search_area_height_before_sr_reduction) {
1290
0
                                search_area_height = search_area_height >> 1;
1291
0
                                search_area_width  = search_area_width >> 1;
1292
0
                            }
1293
0
                        }
1294
0
                    }
1295
0
                }
1296
0
            }
1297
0
            svt_initialize_buffer_32bits(me_ctx->p_sb_best_sad[list_index][ref_pic_index], 21, 1, MAX_SAD_VALUE);
1298
0
            me_ctx->p_best_sad_64x64 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_64x64]);
1299
0
            me_ctx->p_best_sad_32x32 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_32x32_0]);
1300
0
            me_ctx->p_best_sad_16x16 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_16x16_0]);
1301
0
            me_ctx->p_best_sad_8x8   = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_8x8_0]);
1302
1303
0
            me_ctx->p_best_mv64x64 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_64x64]);
1304
0
            me_ctx->p_best_mv32x32 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_32x32_0]);
1305
0
            me_ctx->p_best_mv16x16 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_16x16_0]);
1306
0
            me_ctx->p_best_mv8x8   = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_8x8_0]);
1307
1308
            /* If search area is large enough, check the ME 8x8 SAD variance, and if low, reduce search area
1309
            * (as the 64x64 MVs are likely good for all the 8x8 blocks that make it up).  If the search area
1310
            * is already low, the overhead of searching one additional point will be high (and fruitless, since
1311
            * the minimum search size that will be set by the 8x8 SAD variance algorithm is 8x3.
1312
            */
1313
0
            if (me_ctx->me_8x8_var_ctrls.enabled && (search_area_width * search_area_height > 24)) {
1314
0
                x_search_area_origin     = x_search_center;
1315
0
                y_search_area_origin     = y_search_center;
1316
0
                x_top_left_search_region = (int16_t)(b64_origin_x) - (ME_FILTER_TAP >> 1) + x_search_area_origin;
1317
0
                y_top_left_search_region = (int16_t)(b64_origin_y) - (ME_FILTER_TAP >> 1) + y_search_area_origin;
1318
0
                search_region_index = (x_top_left_search_region) + (y_top_left_search_region)*ref_pic_ptr->y_stride;
1319
0
                me_ctx->integer_buffer_ptr[list_index][ref_pic_index] = &(ref_pic_ptr->y_buffer[search_region_index]);
1320
0
                me_ctx->interpolated_full_stride[list_index][ref_pic_index] = ref_pic_ptr->y_stride;
1321
1322
0
                open_loop_me_fullpel_search_sblock(
1323
0
                    me_ctx, list_index, ref_pic_index, x_search_center, y_search_center, 1, 1);
1324
1325
                // Since only one point was searched, the 64x64 SAD will be the same as the sum of the 8x8 SADs
1326
0
                const uint32_t mean_dist_8x8     = me_ctx->p_best_sad_64x64[0] / 64;
1327
0
                uint32_t       sum_ofsq_dist_8x8 = 0;
1328
0
                for (unsigned i = 0; i < 64; i++) {
1329
0
                    const int32_t diff = ((int32_t)me_ctx->p_best_sad_8x8[i] - (int32_t)mean_dist_8x8);
1330
0
                    sum_ofsq_dist_8x8 += diff * diff;
1331
0
                }
1332
1333
0
                uint32_t me_8x8_cost_var = (uint32_t)(sum_ofsq_dist_8x8 / 64);
1334
1335
0
                if (me_8x8_cost_var > me_ctx->me_8x8_var_ctrls.me_sr_mult2_th) {
1336
0
                    search_area_width  = (MAX(1, search_area_width * 3 / 2) + 7) & ~0x7;
1337
0
                    search_area_height = MAX(1, search_area_height * 3 / 2);
1338
0
                }
1339
1340
0
                if (me_8x8_cost_var < me_ctx->me_8x8_var_ctrls.me_sr_div4_th) {
1341
0
                    search_area_width  = (MAX(1, search_area_width >> 2) + 7) & ~0x7;
1342
0
                    search_area_height = MAX(1, search_area_height >> 2);
1343
0
                    search_area_height = MAX(3, search_area_height);
1344
0
                } else if (me_8x8_cost_var < me_ctx->me_8x8_var_ctrls.me_sr_div2_th) {
1345
0
                    search_area_width  = (MIN(search_area_width, search_area_width >> 1) + 7) & ~0x7;
1346
0
                    search_area_height = MIN(search_area_height, search_area_height >> 1);
1347
0
                    search_area_height = MAX(3, search_area_height);
1348
0
                }
1349
0
            }
1350
0
            x_search_area_origin = x_search_center - (search_area_width >> 1);
1351
0
            y_search_area_origin = y_search_center - (search_area_height >> 1);
1352
1353
            // Correct the left edge of the Search Area if it is not on the
1354
            // reference Picture
1355
0
            x_search_area_origin = ((org_x + x_search_area_origin) < -pad_width) ? -pad_width - org_x
1356
0
                                                                                 : x_search_area_origin;
1357
0
            search_area_width    = ((org_x + x_search_area_origin) < -pad_width)
1358
0
                   ? search_area_width - (-pad_width - (org_x + x_search_area_origin))
1359
0
                   : search_area_width;
1360
            // Correct the right edge of the Search Area if its not on the
1361
            // reference Picture
1362
0
            x_search_area_origin = ((org_x + x_search_area_origin) > picture_width - 1)
1363
0
                ? x_search_area_origin - ((org_x + x_search_area_origin) - (picture_width - 1))
1364
0
                : x_search_area_origin;
1365
1366
0
            search_area_width = ((org_x + x_search_area_origin + search_area_width) > picture_width)
1367
0
                ? MAX(1, search_area_width - ((org_x + x_search_area_origin + search_area_width) - picture_width))
1368
0
                : search_area_width;
1369
1370
            // Constrain x_ME to be a multiple of 8 (round down as cropping
1371
            // already performed)
1372
0
            search_area_width = (search_area_width < 8) ? search_area_width : search_area_width & ~0x07;
1373
1374
            // Correct the top edge of the Search Area if it is not on the
1375
            // reference Picture
1376
0
            y_search_area_origin = ((org_y + y_search_area_origin) < -pad_height) ? -pad_height - org_y
1377
0
                                                                                  : y_search_area_origin;
1378
0
            search_area_height   = ((org_y + y_search_area_origin) < -pad_height)
1379
0
                  ? search_area_height - (-pad_height - (org_y + y_search_area_origin))
1380
0
                  : search_area_height;
1381
            // Correct the bottom edge of the Search Area if its not on the
1382
            // reference Picture
1383
0
            y_search_area_origin = ((org_y + y_search_area_origin) > picture_height - 1)
1384
0
                ? y_search_area_origin - ((org_y + y_search_area_origin) - (picture_height - 1))
1385
0
                : y_search_area_origin;
1386
0
            search_area_height   = (org_y + y_search_area_origin + search_area_height > picture_height)
1387
0
                  ? MAX(1, search_area_height - ((org_y + y_search_area_origin + search_area_height) - picture_height))
1388
0
                  : search_area_height;
1389
1390
0
            x_top_left_search_region = (int16_t)(b64_origin_x) - (ME_FILTER_TAP >> 1) + x_search_area_origin;
1391
0
            y_top_left_search_region = (int16_t)(b64_origin_y) - (ME_FILTER_TAP >> 1) + y_search_area_origin;
1392
0
            search_region_index      = (x_top_left_search_region) + (y_top_left_search_region)*ref_pic_ptr->y_stride;
1393
0
            me_ctx->integer_buffer_ptr[list_index][ref_pic_index]       = &(ref_pic_ptr->y_buffer[search_region_index]);
1394
0
            me_ctx->interpolated_full_stride[list_index][ref_pic_index] = ref_pic_ptr->y_stride;
1395
1396
            // Move to the top left of the search region
1397
0
            x_top_left_search_region = (int16_t)(b64_origin_x) + x_search_area_origin;
1398
0
            y_top_left_search_region = (int16_t)(b64_origin_y) + y_search_area_origin;
1399
0
            open_loop_me_fullpel_search_sblock(me_ctx,
1400
0
                                               list_index,
1401
0
                                               ref_pic_index,
1402
0
                                               x_search_area_origin,
1403
0
                                               y_search_area_origin,
1404
0
                                               search_area_width,
1405
0
                                               search_area_height);
1406
0
        }
1407
0
    }
1408
0
}
1409
1410
/*
1411
  using previous stage ME results (Integer Search) for each reference
1412
  frame. keep only the references that are close to the best reference.
1413
*/
1414
0
static void me_prune_ref(MeContext* me_ctx) {
1415
0
    uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search;
1416
0
    for (uint8_t list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) {
1417
0
        uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
1418
        // Ref Picture Loop
1419
0
        for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) {
1420
0
            me_ctx->search_results[list_index][ref_pic_index].hme_sad = 0;
1421
            // Get hme results
1422
0
            if (me_ctx->search_results[list_index][ref_pic_index].do_ref == 0) {
1423
0
                me_ctx->search_results[list_index][ref_pic_index].hme_sad = MAX_SAD_VALUE * 64;
1424
0
                continue;
1425
0
            }
1426
0
            me_ctx->p_best_sad_8x8 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_8x8_0]);
1427
            // 8x8   [64 partitions]
1428
0
            for (uint32_t pu_index = 0; pu_index < 64; ++pu_index) {
1429
0
                uint32_t idx = tab8x8[pu_index];
1430
0
                me_ctx->search_results[list_index][ref_pic_index].hme_sad += me_ctx->p_best_sad_8x8[idx];
1431
0
            }
1432
0
        }
1433
0
    }
1434
1435
0
    uint16_t prune_ref_th = me_ctx->me_hme_prune_ctrls.prune_ref_if_me_sad_dev_bigger_than_th;
1436
0
    if (me_ctx->me_hme_prune_ctrls.enable_me_hme_ref_pruning && prune_ref_th != (uint16_t)~0) {
1437
0
        uint64_t best = (uint64_t)~0;
1438
0
        for (int i = 0; i < MAX_NUM_OF_REF_PIC_LIST; ++i) {
1439
0
            for (int j = 0; j < REF_LIST_MAX_DEPTH; ++j) {
1440
0
                if (me_ctx->search_results[i][j].hme_sad < best) {
1441
0
                    best = me_ctx->search_results[i][j].hme_sad;
1442
0
                }
1443
0
            }
1444
0
        }
1445
0
        for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) {
1446
0
            for (uint32_t ri = 1; ri < REF_LIST_MAX_DEPTH; ri++) {
1447
                // Prune references based on ME sad
1448
0
                if ((me_ctx->search_results[li][ri].hme_sad - best) * 100 > (prune_ref_th * best)) {
1449
0
                    me_ctx->search_results[li][ri].do_ref = 0;
1450
0
                }
1451
0
            }
1452
0
        }
1453
0
    }
1454
0
}
1455
1456
/* perform  motion search over a given search area*/
1457
static void prehme_core(MeContext* me_ctx, int16_t org_x, int16_t org_y, uint32_t sb_width, uint32_t sb_height,
1458
0
                        EbPictureBufferDesc* sixteenth_ref_pic_ptr, SearchInfo* prehme_data) {
1459
0
    int16_t x_top_left_search_region;
1460
0
    int16_t y_top_left_search_region;
1461
0
    int32_t search_region_index;
1462
1463
0
    int16_t pad_width  = (int16_t)(sixteenth_ref_pic_ptr->border) - 1;
1464
0
    int16_t pad_height = (int16_t)(sixteenth_ref_pic_ptr->border) - 1;
1465
1466
0
    int16_t search_area_width  = prehme_data->sa.width;
1467
0
    int16_t search_area_height = prehme_data->sa.height;
1468
1469
0
    int16_t x_search_area_origin = -(int16_t)(search_area_width >> 1);
1470
0
    int16_t y_search_area_origin = -(int16_t)(search_area_height >> 1);
1471
1472
    // Correct the left edge of the Search Area if it is not on the reference Picture
1473
0
    x_search_area_origin = ((org_x + x_search_area_origin) < -pad_width) ? -pad_width - org_x : x_search_area_origin;
1474
1475
0
    search_area_width = ((org_x + x_search_area_origin) < -pad_width)
1476
0
        ? search_area_width - (-pad_width - (org_x + x_search_area_origin))
1477
0
        : search_area_width;
1478
1479
    // Correct the right edge of the Search Area if its not on the reference Picture
1480
0
    x_search_area_origin = ((org_x + x_search_area_origin) > (int16_t)sixteenth_ref_pic_ptr->width - 1)
1481
0
        ? x_search_area_origin - ((org_x + x_search_area_origin) - ((int16_t)sixteenth_ref_pic_ptr->width - 1))
1482
0
        : x_search_area_origin;
1483
1484
0
    search_area_width = ((org_x + x_search_area_origin + search_area_width) > (int16_t)sixteenth_ref_pic_ptr->width)
1485
0
        ? MAX(1,
1486
0
              search_area_width -
1487
0
                  ((org_x + x_search_area_origin + search_area_width) - (int16_t)sixteenth_ref_pic_ptr->width))
1488
0
        : search_area_width;
1489
1490
    // Correct the top edge of the Search Area if it is not on the reference Picture
1491
0
    y_search_area_origin = ((org_y + y_search_area_origin) < -pad_height) ? -pad_height - org_y : y_search_area_origin;
1492
1493
0
    search_area_height = ((org_y + y_search_area_origin) < -pad_height)
1494
0
        ? search_area_height - (-pad_height - (org_y + y_search_area_origin))
1495
0
        : search_area_height;
1496
1497
    // Correct the bottom edge of the Search Area if its not on the reference Picture
1498
0
    y_search_area_origin = ((org_y + y_search_area_origin) > (int16_t)sixteenth_ref_pic_ptr->height - 1)
1499
0
        ? y_search_area_origin - ((org_y + y_search_area_origin) - ((int16_t)sixteenth_ref_pic_ptr->height - 1))
1500
0
        : y_search_area_origin;
1501
1502
0
    search_area_height = (org_y + y_search_area_origin + search_area_height > (int16_t)sixteenth_ref_pic_ptr->height)
1503
0
        ? MAX(1,
1504
0
              search_area_height -
1505
0
                  ((org_y + y_search_area_origin + search_area_height) - (int16_t)sixteenth_ref_pic_ptr->height))
1506
0
        : search_area_height;
1507
1508
0
    x_top_left_search_region = (org_x) + x_search_area_origin;
1509
0
    y_top_left_search_region = (org_y) + y_search_area_origin;
1510
0
    search_region_index      = x_top_left_search_region + y_top_left_search_region * sixteenth_ref_pic_ptr->y_stride;
1511
1512
0
    svt_sad_loop_kernel(&me_ctx->sixteenth_b64_buffer[0],
1513
0
                        me_ctx->hme_search_method == FULL_SAD_SEARCH ? me_ctx->sixteenth_b64_buffer_stride
1514
0
                                                                     : me_ctx->sixteenth_b64_buffer_stride * 2,
1515
0
                        &sixteenth_ref_pic_ptr->y_buffer[search_region_index],
1516
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? sixteenth_ref_pic_ptr->y_stride
1517
0
                                                                       : sixteenth_ref_pic_ptr->y_stride * 2,
1518
0
                        (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? sb_height : sb_height >> 1,
1519
0
                        sb_width,
1520
                        /* results */
1521
0
                        &prehme_data->sad,
1522
0
                        &prehme_data->best_mv.x,
1523
0
                        &prehme_data->best_mv.y,
1524
0
                        sixteenth_ref_pic_ptr->y_stride,
1525
0
                        me_ctx->prehme_ctrl.skip_search_line,
1526
0
                        search_area_width,
1527
0
                        search_area_height);
1528
1529
0
    prehme_data->sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH)
1530
0
        ? prehme_data->sad
1531
0
        : prehme_data->sad * 2; // Multiply by 2 because considered only ever other line
1532
0
    prehme_data->best_mv.x += x_search_area_origin;
1533
0
    prehme_data->best_mv.x *= 4; // Multiply by 4 because operating on 1/4 resolution
1534
0
    prehme_data->best_mv.y += y_search_area_origin;
1535
0
    prehme_data->best_mv.y *= 4; // Multiply by 4 because operating on 1/4 resolution
1536
0
    prehme_data->valid = 1;
1537
0
    return;
1538
0
}
1539
1540
static uint32_t get_zz_sad(EbPictureBufferDesc* ref_pic_ptr, MeContext* me_ctx, uint32_t sb_origin_x,
1541
                           uint32_t sb_origin_y, uint32_t sb_width, uint32_t sb_height)
1542
1543
0
{
1544
0
    uint32_t zero_mv_sad;
1545
0
    int16_t  org_x         = (int16_t)sb_origin_x;
1546
0
    int16_t  org_y         = (int16_t)sb_origin_y;
1547
0
    uint32_t subsample_sad = 1;
1548
1549
0
    int32_t search_region_index = org_x + (org_y)*ref_pic_ptr->y_stride;
1550
1551
0
    zero_mv_sad = svt_nxm_sad_kernel(me_ctx->b64_src_ptr,
1552
0
                                     me_ctx->b64_src_stride << subsample_sad,
1553
0
                                     &(ref_pic_ptr->y_buffer[search_region_index]),
1554
0
                                     ref_pic_ptr->y_stride << subsample_sad,
1555
0
                                     sb_height >> subsample_sad,
1556
0
                                     sb_width);
1557
1558
0
    zero_mv_sad = zero_mv_sad << subsample_sad;
1559
1560
0
    return zero_mv_sad;
1561
0
}
1562
1563
// Determine if pre-HME for the current picture and search region should be skipped.
1564
// Return 1 if can early exit (i.e. skip pre-hme for current frame and search region)
1565
// Return 0 if can't skip
1566
0
static bool check_prehme_early_exit(MeContext* me_ctx, uint8_t list_i, uint8_t ref_i, uint8_t sr_i) {
1567
0
    SearchInfo* prehme_data = &me_ctx->prehme_data[list_i][ref_i][sr_i];
1568
1569
0
    if (me_ctx->me_early_exit_th) {
1570
0
        if (me_ctx->zz_sad[list_i][ref_i] < me_ctx->me_early_exit_th) {
1571
0
            prehme_data->best_mv.as_int = 0;
1572
0
            prehme_data->sad            = 0;
1573
0
            prehme_data->valid          = 1;
1574
0
            return 1;
1575
0
        }
1576
0
    }
1577
1578
0
    if (me_ctx->prehme_ctrl.l1_early_exit) {
1579
0
        if (list_i == 1 && me_ctx->prehme_data[0][ref_i][sr_i].valid &&
1580
0
            ((me_ctx->prehme_data[0][ref_i][sr_i].sad < (32 * 32)) ||
1581
0
             ((ABS(me_ctx->prehme_data[0][ref_i][sr_i].best_mv.x) < 16) &&
1582
0
              (ABS(me_ctx->prehme_data[0][ref_i][sr_i].best_mv.y) < 16)))) {
1583
0
            prehme_data->best_mv.x = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.x;
1584
0
            prehme_data->best_mv.y = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.y;
1585
0
            prehme_data->sad       = me_ctx->prehme_data[0][ref_i][sr_i].sad;
1586
0
            prehme_data->valid     = 1;
1587
0
            return 1;
1588
0
        }
1589
0
    }
1590
0
    return 0;
1591
0
}
1592
1593
/* Perform Pre-HME for one Block 64x64*/
1594
static void prehme_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx,
1595
0
                       EbPictureBufferDesc* input_ptr) {
1596
0
    const uint32_t block_width  = me_ctx->b64_width;
1597
0
    const uint32_t block_height = me_ctx->b64_height;
1598
0
    uint32_t       best_sad     = MAX_U32;
1599
    // List Loop
1600
0
    for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) {
1601
        // Ref Picture Loop
1602
0
        const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_i];
1603
0
        for (uint8_t ref_i = 0; ref_i < num_of_ref_pic_to_search; ++ref_i) {
1604
0
            uint16_t             dist              = 0;
1605
0
            EbPictureBufferDesc* sixteenth_ref_pic = get_me_reference(
1606
0
                pcs, me_ctx, list_i, ref_i, 0, &dist, input_ptr->width, input_ptr->height);
1607
1608
0
            if (me_ctx->temporal_layer_index > 0 || list_i == 0) {
1609
0
                uint32_t hme_sr_factor = svt_aom_get_scaled_picture_distance(dist);
1610
1611
0
                for (uint8_t sr_i = 0; sr_i < SEARCH_REGION_COUNT; sr_i++) {
1612
0
                    if (check_prehme_early_exit(me_ctx, list_i, ref_i, sr_i)) {
1613
0
                        continue;
1614
0
                    }
1615
1616
0
                    SearchInfo* prehme_data = &me_ctx->prehme_data[list_i][ref_i][sr_i];
1617
0
                    if (!me_ctx->search_results[list_i][ref_i].do_ref) {
1618
0
                        prehme_data->best_mv.as_int = 0;
1619
0
                        prehme_data->sad            = MAX_U32;
1620
0
                        continue;
1621
0
                    }
1622
0
                    prehme_data->sa.width  = MIN((me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_min.width * hme_sr_factor),
1623
0
                                                me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_max.width);
1624
0
                    prehme_data->sa.height = MIN(
1625
0
                        (me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_min.height * hme_sr_factor),
1626
0
                        me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_max.height);
1627
1628
0
                    prehme_core(me_ctx,
1629
0
                                ((int16_t)org_x) >> 2,
1630
0
                                ((int16_t)org_y) >> 2,
1631
0
                                block_width >> 2,
1632
0
                                block_height >> 2,
1633
0
                                sixteenth_ref_pic,
1634
0
                                prehme_data);
1635
0
                    me_ctx->performed_phme[list_i][ref_i][sr_i] = 1;
1636
0
                }
1637
0
                uint32_t min_sad = (uint32_t)MIN(me_ctx->prehme_data[list_i][ref_i][0].sad,
1638
0
                                                 me_ctx->prehme_data[list_i][ref_i][1].sad);
1639
0
                best_sad         = MIN(best_sad, min_sad);
1640
0
            } else {
1641
                // PW: Does this account for base pictures
1642
0
                for (uint8_t sr_i = 0; sr_i < SEARCH_REGION_COUNT; sr_i++) {
1643
0
                    me_ctx->prehme_data[1][ref_i][sr_i].best_mv.x = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.x;
1644
0
                    me_ctx->prehme_data[1][ref_i][sr_i].best_mv.y = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.y;
1645
0
                    me_ctx->prehme_data[1][ref_i][sr_i].sad       = me_ctx->prehme_data[0][ref_i][sr_i].sad;
1646
0
                }
1647
0
            }
1648
0
        } // End ref pic loop
1649
0
    } // End list loop
1650
0
    if (me_ctx->temporal_layer_index > 0 && best_sad < me_ctx->me_hme_prune_ctrls.phme_sad_th) {
1651
0
        for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) {
1652
0
            for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) {
1653
0
                if (!me_ctx->search_results[list_i][ref_i].do_ref) {
1654
0
                    continue;
1655
0
                }
1656
0
                if (ref_i == 0) {
1657
0
                    continue;
1658
0
                }
1659
1660
0
                const uint32_t prhme_th   = me_ctx->me_hme_prune_ctrls.phme_sad_pct;
1661
0
                uint32_t       prehme_sad = (uint32_t)MIN(me_ctx->prehme_data[list_i][ref_i][0].sad,
1662
0
                                                    me_ctx->prehme_data[list_i][ref_i][1].sad);
1663
0
                if ((prehme_sad - best_sad) * 100 > (prhme_th * best_sad)) {
1664
0
                    me_ctx->search_results[list_i][ref_i].do_ref = 0;
1665
0
                }
1666
0
            }
1667
0
        }
1668
0
    }
1669
0
}
1670
1671
// Set the HME L0 search area.  Perform scaling based on list index and ref index.
1672
// HME L0 search area should be the same for each search region
1673
static void get_hme_l0_search_area(MeContext* me_ctx, uint8_t list_index, uint8_t ref_pic_index, uint16_t dist,
1674
0
                                   int16_t* sa_width, int16_t* sa_height) {
1675
    // Reduce HME search area for higher ref indices
1676
0
    if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment &&
1677
0
        me_ctx->me_sr_adjustment_ctrls.distance_based_hme_resizing) {
1678
0
        uint8_t is_hor   = 1;
1679
0
        uint8_t is_ver   = 1;
1680
0
        uint8_t is_still = 0;
1681
1682
0
        if (me_ctx->reduce_hme_l0_sr_th_min && me_ctx->reduce_hme_l0_sr_th_max) {
1683
0
            if (list_index || ref_pic_index) {
1684
0
                int16_t l0_mvx = me_ctx->x_hme_level0_search_center[0][0][0 /*quadrant-x*/][0 /*quadrant-y*/];
1685
0
                int16_t l0_mvy = me_ctx->y_hme_level0_search_center[0][0][0 /*quadrant-x*/][0 /*quadrant-y*/];
1686
1687
                // Determine whether the computed motion from list0/ref_index0 is in vertical or horizintal direction
1688
0
                is_ver   = ((ABS(l0_mvx) < me_ctx->reduce_hme_l0_sr_th_min) &&
1689
0
                          (ABS(l0_mvy) > me_ctx->reduce_hme_l0_sr_th_max));
1690
0
                is_hor   = ((ABS(l0_mvx) > me_ctx->reduce_hme_l0_sr_th_max) &&
1691
0
                          (ABS(l0_mvy) < me_ctx->reduce_hme_l0_sr_th_min));
1692
0
                is_still = ((ABS(l0_mvx) < (me_ctx->reduce_hme_l0_sr_th_min * 3)) &&
1693
0
                            (ABS(l0_mvy) < (me_ctx->reduce_hme_l0_sr_th_min * 3)));
1694
0
            }
1695
0
        }
1696
1697
0
        uint8_t x_offset = 1;
1698
0
        uint8_t y_offset = 1;
1699
0
        if (!is_ver) {
1700
0
            y_offset = 2;
1701
0
        }
1702
0
        if (!is_hor) {
1703
0
            x_offset = 2;
1704
0
        }
1705
1706
0
        if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment == 2) {
1707
0
            if (is_still) {
1708
0
                x_offset = 4;
1709
0
                y_offset = 4;
1710
0
            }
1711
0
        }
1712
1713
0
        me_ctx->hme_l0_sa.sa_min.width  = me_ctx->hme_l0_sa.sa_min.width / (x_offset + ref_pic_index);
1714
0
        me_ctx->hme_l0_sa.sa_min.height = me_ctx->hme_l0_sa.sa_min.height / (y_offset + ref_pic_index);
1715
0
        me_ctx->hme_l0_sa.sa_max.width  = me_ctx->hme_l0_sa.sa_max.width / (x_offset + ref_pic_index);
1716
0
        me_ctx->hme_l0_sa.sa_max.height = me_ctx->hme_l0_sa.sa_max.height / (y_offset + ref_pic_index);
1717
0
    }
1718
1719
0
    int32_t hme_sr_factor = svt_aom_get_scaled_picture_distance(dist);
1720
1721
    // Derive the search area width and height, rounding the width up to the nearest sixteenth
1722
0
    int16_t search_area_width  = me_ctx->hme_l0_sa.sa_min.width / me_ctx->num_hme_sa_w;
1723
0
    search_area_width          = (int16_t)MIN((((search_area_width * hme_sr_factor) + 15) & ~0x0F),
1724
0
                                     (((me_ctx->hme_l0_sa.sa_max.width / me_ctx->num_hme_sa_w) + 15) & ~0x0F));
1725
0
    int16_t search_area_height = me_ctx->hme_l0_sa.sa_min.height / me_ctx->num_hme_sa_h;
1726
0
    search_area_height         = (int16_t)MIN((search_area_height * hme_sr_factor),
1727
0
                                      me_ctx->hme_l0_sa.sa_max.height / me_ctx->num_hme_sa_h);
1728
1729
0
    *sa_width  = search_area_width;
1730
0
    *sa_height = search_area_height;
1731
0
}
1732
1733
//this functions returns the worst quadrant in terms of sad.
1734
//it is implemented w/o for loops to get away from a VS2022 compiler issue.
1735
//it then assumes a fixed quadrant sizes of 2 each direction.
1736
static void get_worst_quadrant(MeContext* me_ctx, uint32_t list_index, uint32_t ref_pic_index, uint8_t* best_w,
1737
0
                               uint8_t* best_h) {
1738
0
    if (me_ctx->num_hme_sa_w != 2 || me_ctx->num_hme_sa_h != 2) {
1739
0
        svt_aom_assert_err(0, "update other quadrant sizes");
1740
0
        return;
1741
0
    }
1742
0
    uint64_t max_sad = 0;
1743
1744
0
    if (me_ctx->hme_level0_sad[list_index][ref_pic_index][0][0] > max_sad) {
1745
0
        max_sad = me_ctx->hme_level0_sad[list_index][ref_pic_index][0][0];
1746
0
        *best_w = 0;
1747
0
        *best_h = 0;
1748
0
    }
1749
0
    if (me_ctx->hme_level0_sad[list_index][ref_pic_index][1][0] > max_sad) {
1750
0
        max_sad = me_ctx->hme_level0_sad[list_index][ref_pic_index][1][0];
1751
0
        *best_w = 1;
1752
0
        *best_h = 0;
1753
0
    }
1754
0
    if (me_ctx->hme_level0_sad[list_index][ref_pic_index][0][1] > max_sad) {
1755
0
        max_sad = me_ctx->hme_level0_sad[list_index][ref_pic_index][0][1];
1756
0
        *best_w = 0;
1757
0
        *best_h = 1;
1758
0
    }
1759
0
    if (me_ctx->hme_level0_sad[list_index][ref_pic_index][1][1] > max_sad) {
1760
0
        *best_w = 1;
1761
0
        *best_h = 1;
1762
0
    }
1763
0
}
1764
1765
/*******************************************
1766
 * performs hierarchical ME level 0 for one 64x64 block (uni-prediction only)
1767
 *******************************************/
1768
static void hme_level0_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx,
1769
0
                           EbPictureBufferDesc* input_ptr) {
1770
0
    const uint32_t block_width  = me_ctx->b64_width;
1771
0
    const uint32_t block_height = me_ctx->b64_height;
1772
1773
    // store base HME sizes, to be used if using ref-index based HME resizing
1774
0
    SearchAreaMinMax base_hme_sa;
1775
0
    base_hme_sa.sa_min = (SearchArea){me_ctx->hme_l0_sa.sa_min.width, me_ctx->hme_l0_sa.sa_min.height};
1776
0
    base_hme_sa.sa_max = (SearchArea){me_ctx->hme_l0_sa.sa_max.width, me_ctx->hme_l0_sa.sa_max.height};
1777
1778
    // List Loop
1779
0
    const uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search;
1780
0
    for (uint8_t list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) {
1781
        // Ref Picture Loop
1782
0
        const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
1783
0
        for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) {
1784
            // If me_early_exit_th is enabled, skip HME L0 for the current block if the zero-zero SAD is low
1785
0
            if (me_ctx->me_early_exit_th) {
1786
0
                if (me_ctx->zz_sad[list_index][ref_pic_index] < (me_ctx->me_early_exit_th >> 2)) {
1787
0
                    for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) {
1788
0
                        for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) {
1789
0
                            me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1790
0
                            me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1791
0
                            me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y]             = 0;
1792
0
                        }
1793
0
                    }
1794
0
                    continue;
1795
0
                }
1796
0
            }
1797
0
            if (me_ctx->prev_me_stage_based_exit_th) {
1798
0
                uint8_t sr_i = me_ctx->prehme_data[list_index][ref_pic_index][0].sad <=
1799
0
                        me_ctx->prehme_data[list_index][ref_pic_index][1].sad
1800
0
                    ? 0
1801
0
                    : 1;
1802
0
                if (me_ctx->performed_phme[list_index][ref_pic_index][sr_i]) {
1803
0
                    if (me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad <
1804
0
                        (me_ctx->prev_me_stage_based_exit_th >> 4)) {
1805
0
                        for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) {
1806
0
                            for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) {
1807
0
                                me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] =
1808
0
                                    me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.x;
1809
0
                                me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] =
1810
0
                                    me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.y;
1811
0
                                me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y] =
1812
0
                                    me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad;
1813
0
                            }
1814
0
                        }
1815
0
                        continue;
1816
0
                    }
1817
0
                }
1818
0
            }
1819
1820
0
            if (!me_ctx->search_results[list_index][ref_pic_index].do_ref) {
1821
0
                for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) {
1822
0
                    for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) {
1823
0
                        me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1824
0
                        me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1825
0
                        me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y]             = MAX_U32;
1826
0
                    }
1827
0
                }
1828
0
                continue;
1829
0
            }
1830
            // Get the sixteenth downsampled reference picture
1831
0
            uint16_t             dist              = 0;
1832
0
            EbPictureBufferDesc* sixteenth_ref_pic = get_me_reference(
1833
0
                pcs, me_ctx, list_index, ref_pic_index, 0, &dist, input_ptr->width, input_ptr->height);
1834
1835
0
            if (me_ctx->temporal_layer_index > 0 || list_index == 0) {
1836
                // Get the HME L0 search dimensions for the current frame
1837
0
                int16_t sa_width = 0, sa_height = 0;
1838
0
                get_hme_l0_search_area(me_ctx, list_index, ref_pic_index, dist, &sa_width, &sa_height);
1839
0
                for (uint8_t sr_h = 0; sr_h < me_ctx->num_hme_sa_h; sr_h++) {
1840
0
                    for (uint8_t sr_w = 0; sr_w < me_ctx->num_hme_sa_w; sr_w++) {
1841
0
                        hme_level_0(me_ctx,
1842
0
                                    ((int16_t)org_x) >> 2,
1843
0
                                    ((int16_t)org_y) >> 2,
1844
0
                                    block_width >> 2,
1845
0
                                    block_height >> 2,
1846
0
                                    sa_width,
1847
0
                                    sa_height,
1848
0
                                    sixteenth_ref_pic,
1849
0
                                    sr_w,
1850
0
                                    sr_h,
1851
0
                                    &(me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w][sr_h]),
1852
0
                                    &(me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h]),
1853
0
                                    &(me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h]));
1854
0
                    }
1855
0
                }
1856
1857
                // reset base HME area
1858
0
                if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment &&
1859
0
                    me_ctx->me_sr_adjustment_ctrls.distance_based_hme_resizing) {
1860
0
                    me_ctx->hme_l0_sa.sa_min = base_hme_sa.sa_min;
1861
0
                    me_ctx->hme_l0_sa.sa_max = base_hme_sa.sa_max;
1862
0
                }
1863
1864
0
                if (me_ctx->prehme_ctrl.enable) {
1865
                    //get the worst quadrant
1866
0
                    uint8_t sr_h_max = 0, sr_w_max = 0;
1867
0
                    get_worst_quadrant(me_ctx, list_index, ref_pic_index, &sr_w_max, &sr_h_max);
1868
1869
0
                    uint8_t sr_i = me_ctx->prehme_data[list_index][ref_pic_index][0].sad <=
1870
0
                            me_ctx->prehme_data[list_index][ref_pic_index][1].sad
1871
0
                        ? 0
1872
0
                        : 1;
1873
                    //replace worst with pre-hme
1874
0
                    if (me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad <
1875
0
                        me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w_max][sr_h_max]) {
1876
0
                        me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w_max][sr_h_max] =
1877
0
                            me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad;
1878
1879
0
                        me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w_max][sr_h_max] =
1880
0
                            me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.x;
1881
1882
0
                        me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w_max][sr_h_max] =
1883
0
                            me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.y;
1884
0
                    }
1885
0
                }
1886
0
            }
1887
0
        } // End ref pic loop
1888
0
    } // End list loop
1889
0
}
1890
1891
/*******************************************
1892
 * performs hierarchical ME level 1 for one 64x64 block (uni-prediction only)
1893
 *******************************************/
1894
static void hme_level1_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx,
1895
0
                           EbPictureBufferDesc* input_ptr) {
1896
0
    const uint32_t block_width  = me_ctx->b64_width;
1897
0
    const uint32_t block_height = me_ctx->b64_height;
1898
1899
    // List Loop
1900
0
    const uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search;
1901
0
    for (uint32_t list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) {
1902
        // Ref Picture Loop
1903
0
        const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
1904
0
        for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) {
1905
0
            uint16_t             dist            = 0;
1906
0
            EbPictureBufferDesc* quarter_ref_pic = get_me_reference(
1907
0
                pcs, me_ctx, list_index, ref_pic_index, 1, &dist, input_ptr->width, input_ptr->height);
1908
1909
0
            if (me_ctx->temporal_layer_index > 0 || list_index == 0) {
1910
                // If me_early_exit_th is enabled, skip HME L0 for the current block if the zero-zero SAD is low
1911
0
                if (me_ctx->me_early_exit_th) {
1912
0
                    if (me_ctx->zz_sad[list_index][ref_pic_index] < (me_ctx->me_early_exit_th >> 2)) {
1913
0
                        for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) {
1914
0
                            for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) {
1915
0
                                me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1916
0
                                me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1917
0
                                me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y]             = 0;
1918
0
                            }
1919
0
                        }
1920
0
                        continue;
1921
0
                    }
1922
0
                }
1923
0
                if (!me_ctx->search_results[list_index][ref_pic_index].do_ref) {
1924
0
                    for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) {
1925
0
                        for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) {
1926
0
                            me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1927
0
                            me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0;
1928
0
                            me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y]             = MAX_U32;
1929
0
                        }
1930
0
                    }
1931
0
                    continue;
1932
0
                }
1933
0
                for (uint8_t sr_h = 0; sr_h < me_ctx->num_hme_sa_h; sr_h++) {
1934
0
                    for (uint8_t sr_w = 0; sr_w < me_ctx->num_hme_sa_w; sr_w++) {
1935
0
                        if (me_ctx->prev_me_stage_based_exit_th) {
1936
0
                            if (me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w][sr_h] <
1937
0
                                (me_ctx->prev_me_stage_based_exit_th >> 5)) {
1938
0
                                me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h] =
1939
0
                                    me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h];
1940
0
                                me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h] =
1941
0
                                    me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h];
1942
0
                                me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h] =
1943
0
                                    me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w][sr_h];
1944
0
                                continue;
1945
0
                            }
1946
0
                        }
1947
1948
0
                        hme_level_1(me_ctx,
1949
0
                                    ((int16_t)org_x) >> 1,
1950
0
                                    ((int16_t)org_y) >> 1,
1951
0
                                    block_width >> 1,
1952
0
                                    block_height >> 1,
1953
0
                                    quarter_ref_pic,
1954
0
                                    (int16_t)me_ctx->hme_l1_sa.width,
1955
0
                                    (int16_t)me_ctx->hme_l1_sa.height,
1956
0
                                    me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h] >> 1,
1957
0
                                    me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h] >> 1,
1958
0
                                    &(me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h]),
1959
0
                                    &(me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h]),
1960
0
                                    &(me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h]));
1961
0
                    }
1962
0
                }
1963
0
            }
1964
0
        } // End ref pic loop
1965
0
    } // End list loop
1966
0
}
1967
1968
/*******************************************
1969
 * performs hierarchical ME level 2 for one 64x64 block (uni-prediction only)
1970
 *******************************************/
1971
static void hme_level2_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx,
1972
0
                           EbPictureBufferDesc* input_ptr) {
1973
0
    const uint32_t block_width  = me_ctx->b64_width;
1974
0
    const uint32_t block_height = me_ctx->b64_height;
1975
    // List Loop
1976
0
    const uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search;
1977
0
    for (int list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) {
1978
        // Ref Picture Loop
1979
0
        const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
1980
0
        for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) {
1981
0
            uint16_t             dist    = 0;
1982
0
            EbPictureBufferDesc* ref_pic = get_me_reference(
1983
0
                pcs, me_ctx, list_index, ref_pic_index, 2, &dist, input_ptr->width, input_ptr->height);
1984
1985
0
            if (me_ctx->temporal_layer_index > 0 || list_index == 0) {
1986
0
                for (uint8_t sr_h = 0; sr_h < me_ctx->num_hme_sa_h; sr_h++) {
1987
0
                    for (uint8_t sr_w = 0; sr_w < me_ctx->num_hme_sa_w; sr_w++) {
1988
0
                        if (me_ctx->prev_me_stage_based_exit_th) {
1989
0
                            if (me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h] <
1990
0
                                (me_ctx->prev_me_stage_based_exit_th >> 2)) {
1991
0
                                me_ctx->x_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h] =
1992
0
                                    me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h];
1993
0
                                me_ctx->y_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h] =
1994
0
                                    me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h];
1995
0
                                me_ctx->hme_level2_sad[list_index][ref_pic_index][sr_w][sr_h] =
1996
0
                                    me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h];
1997
0
                                continue;
1998
0
                            }
1999
0
                        }
2000
2001
0
                        hme_level_2(me_ctx,
2002
0
                                    (int16_t)org_x,
2003
0
                                    (int16_t)org_y,
2004
0
                                    block_width,
2005
0
                                    block_height,
2006
0
                                    ref_pic,
2007
0
                                    (int16_t)me_ctx->hme_l2_sa.width,
2008
0
                                    (int16_t)me_ctx->hme_l2_sa.height,
2009
0
                                    me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h],
2010
0
                                    me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h],
2011
0
                                    &(me_ctx->hme_level2_sad[list_index][ref_pic_index][sr_w][sr_h]),
2012
0
                                    &(me_ctx->x_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h]),
2013
0
                                    &(me_ctx->y_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h]));
2014
0
                    }
2015
0
                }
2016
0
            }
2017
0
        } // End ref pic loop
2018
0
    } // End list loop
2019
0
}
2020
2021
/*******************************************
2022
 *   Set the final search centre
2023
 *******************************************/
2024
2025
0
void set_final_search_centre_sb(PictureParentControlSet* pcs, MeContext* me_ctx) {
2026
0
    UNUSED(pcs);
2027
    // Hierarchical ME Search Center
2028
0
    int16_t xHmeSearchCenter = 0;
2029
0
    int16_t yHmeSearchCenter = 0;
2030
2031
    // Final ME Search Center
2032
0
    int16_t x_search_center = 0;
2033
0
    int16_t y_search_center = 0;
2034
2035
    // Search Center SADs
2036
0
    uint64_t hmeMvSad = 0;
2037
0
    uint32_t num_of_list_to_search;
2038
0
    uint32_t list_index;
2039
0
    uint8_t  ref_pic_index;
2040
    // Configure HME level 0, level 1 and level 2 from static config parameters
2041
0
    bool enable_hme_level0_flag = me_ctx->enable_hme_level0_flag;
2042
0
    bool enable_hme_level1_flag = me_ctx->enable_hme_level1_flag;
2043
0
    bool enable_hme_level2_flag = me_ctx->enable_hme_level2_flag;
2044
2045
0
    uint64_t best_cost    = (uint64_t)~0;
2046
0
    me_ctx->best_list_idx = 0;
2047
0
    me_ctx->best_ref_idx  = 0;
2048
0
    num_of_list_to_search = me_ctx->num_of_list_to_search;
2049
2050
    // Uni-Prediction motion estimation loop
2051
    // List Loop
2052
0
    for (list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) {
2053
0
        uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
2054
        // Ref Picture Loop
2055
0
        for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) {
2056
0
            if (me_ctx->temporal_layer_index > 0 || list_index == 0) {
2057
0
                if (me_ctx->enable_hme_flag) {
2058
                    // Hierarchical ME - Search Center
2059
0
                    if (enable_hme_level0_flag && !enable_hme_level1_flag && !enable_hme_level2_flag) {
2060
0
                        xHmeSearchCenter = me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][0][0];
2061
0
                        yHmeSearchCenter = me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][0][0];
2062
0
                        hmeMvSad         = me_ctx->hme_level0_sad[list_index][ref_pic_index][0][0];
2063
2064
0
                        uint32_t search_region_number_in_width  = 1;
2065
0
                        uint32_t search_region_number_in_height = 0;
2066
0
                        while (search_region_number_in_height < me_ctx->num_hme_sa_h) {
2067
0
                            while (search_region_number_in_width < me_ctx->num_hme_sa_w) {
2068
0
                                xHmeSearchCenter =
2069
0
                                    (me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width]
2070
0
                                                           [search_region_number_in_height] < hmeMvSad)
2071
0
                                    ? me_ctx->x_hme_level0_search_center[list_index][ref_pic_index]
2072
0
                                                                        [search_region_number_in_width]
2073
0
                                                                        [search_region_number_in_height]
2074
0
                                    : xHmeSearchCenter;
2075
0
                                yHmeSearchCenter =
2076
0
                                    (me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width]
2077
0
                                                           [search_region_number_in_height] < hmeMvSad)
2078
0
                                    ? me_ctx->y_hme_level0_search_center[list_index][ref_pic_index]
2079
0
                                                                        [search_region_number_in_width]
2080
0
                                                                        [search_region_number_in_height]
2081
0
                                    : yHmeSearchCenter;
2082
0
                                hmeMvSad =
2083
0
                                    (me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width]
2084
0
                                                           [search_region_number_in_height] < hmeMvSad)
2085
0
                                    ? me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width]
2086
0
                                                            [search_region_number_in_height]
2087
0
                                    : hmeMvSad;
2088
0
                                search_region_number_in_width++;
2089
0
                            }
2090
0
                            search_region_number_in_width = 0;
2091
0
                            search_region_number_in_height++;
2092
0
                        }
2093
0
                    }
2094
2095
0
                    if (enable_hme_level1_flag && !enable_hme_level2_flag) {
2096
0
                        xHmeSearchCenter = me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][0][0];
2097
0
                        yHmeSearchCenter = me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][0][0];
2098
0
                        hmeMvSad         = me_ctx->hme_level1_sad[list_index][ref_pic_index][0][0];
2099
2100
0
                        uint32_t search_region_number_in_width  = 1;
2101
0
                        uint32_t search_region_number_in_height = 0;
2102
0
                        while (search_region_number_in_height < me_ctx->num_hme_sa_h) {
2103
0
                            while (search_region_number_in_width < me_ctx->num_hme_sa_w) {
2104
0
                                xHmeSearchCenter =
2105
0
                                    (me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width]
2106
0
                                                           [search_region_number_in_height] < hmeMvSad)
2107
0
                                    ? me_ctx->x_hme_level1_search_center[list_index][ref_pic_index]
2108
0
                                                                        [search_region_number_in_width]
2109
0
                                                                        [search_region_number_in_height]
2110
0
                                    : xHmeSearchCenter;
2111
0
                                yHmeSearchCenter =
2112
0
                                    (me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width]
2113
0
                                                           [search_region_number_in_height] < hmeMvSad)
2114
0
                                    ? me_ctx->y_hme_level1_search_center[list_index][ref_pic_index]
2115
0
                                                                        [search_region_number_in_width]
2116
0
                                                                        [search_region_number_in_height]
2117
0
                                    : yHmeSearchCenter;
2118
0
                                hmeMvSad =
2119
0
                                    (me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width]
2120
0
                                                           [search_region_number_in_height] < hmeMvSad)
2121
0
                                    ? me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width]
2122
0
                                                            [search_region_number_in_height]
2123
0
                                    : hmeMvSad;
2124
0
                                search_region_number_in_width++;
2125
0
                            }
2126
0
                            search_region_number_in_width = 0;
2127
0
                            search_region_number_in_height++;
2128
0
                        }
2129
0
                    }
2130
2131
0
                    if (enable_hme_level2_flag) {
2132
0
                        xHmeSearchCenter = me_ctx->x_hme_level2_search_center[list_index][ref_pic_index][0][0];
2133
0
                        yHmeSearchCenter = me_ctx->y_hme_level2_search_center[list_index][ref_pic_index][0][0];
2134
0
                        hmeMvSad         = me_ctx->hme_level2_sad[list_index][ref_pic_index][0][0];
2135
2136
0
                        uint32_t search_region_number_in_width  = 1;
2137
0
                        uint32_t search_region_number_in_height = 0;
2138
0
                        while (search_region_number_in_height < me_ctx->num_hme_sa_h) {
2139
0
                            while (search_region_number_in_width < me_ctx->num_hme_sa_w) {
2140
0
                                xHmeSearchCenter =
2141
0
                                    (me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width]
2142
0
                                                           [search_region_number_in_height] < hmeMvSad)
2143
0
                                    ? me_ctx->x_hme_level2_search_center[list_index][ref_pic_index]
2144
0
                                                                        [search_region_number_in_width]
2145
0
                                                                        [search_region_number_in_height]
2146
0
                                    : xHmeSearchCenter;
2147
0
                                yHmeSearchCenter =
2148
0
                                    (me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width]
2149
0
                                                           [search_region_number_in_height] < hmeMvSad)
2150
0
                                    ? me_ctx->y_hme_level2_search_center[list_index][ref_pic_index]
2151
0
                                                                        [search_region_number_in_width]
2152
0
                                                                        [search_region_number_in_height]
2153
0
                                    : yHmeSearchCenter;
2154
0
                                hmeMvSad =
2155
0
                                    (me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width]
2156
0
                                                           [search_region_number_in_height] < hmeMvSad)
2157
0
                                    ? me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width]
2158
0
                                                            [search_region_number_in_height]
2159
0
                                    : hmeMvSad;
2160
0
                                search_region_number_in_width++;
2161
0
                            }
2162
0
                            search_region_number_in_width = 0;
2163
0
                            search_region_number_in_height++;
2164
0
                        }
2165
0
                    }
2166
2167
0
                    x_search_center = xHmeSearchCenter;
2168
0
                    y_search_center = yHmeSearchCenter;
2169
0
                }
2170
0
            } else {
2171
0
                x_search_center = 0;
2172
0
                y_search_center = 0;
2173
0
            }
2174
2175
            //sc valid for all cases. 0,0 if hme not done.
2176
0
            me_ctx->search_results[list_index][ref_pic_index].hme_sc_x = x_search_center;
2177
0
            me_ctx->search_results[list_index][ref_pic_index].hme_sc_y = y_search_center;
2178
2179
0
            me_ctx->search_results[list_index][ref_pic_index].hme_sad =
2180
0
                hmeMvSad; //this is not valid in all cases. only when HME is done, and when HMELevel2 is done
2181
            //also for base layer some references are redundant!!
2182
0
            if (hmeMvSad < best_cost) {
2183
0
                best_cost             = hmeMvSad;
2184
0
                me_ctx->best_list_idx = list_index;
2185
0
                me_ctx->best_ref_idx  = ref_pic_index;
2186
0
            }
2187
0
        }
2188
0
    }
2189
0
}
2190
2191
// Initialize zz SAD array
2192
0
static void init_zz_sad(PictureParentControlSet* pcs, MeContext* me_ctx, uint32_t org_x, uint32_t org_y) {
2193
0
    const uint32_t block_width  = me_ctx->b64_width;
2194
0
    const uint32_t block_height = me_ctx->b64_height;
2195
0
    uint32_t       best_zz_sad  = MAX_U32;
2196
    // List Loop
2197
0
    for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) {
2198
        // Ref Picture Loop
2199
0
        for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) {
2200
0
            if (me_ctx->temporal_layer_index > 0 || list_i == 0) {
2201
0
                EbPictureBufferDesc* ref_pic = me_ctx->me_ds_ref_array[list_i][ref_i].picture_ptr;
2202
0
                uint32_t             zz_sad  = get_zz_sad(ref_pic, me_ctx, org_x, org_y, block_width, block_height);
2203
                //normalize for incomplete b64
2204
0
                zz_sad                        = (zz_sad * 64 * 64) / (block_width * block_height);
2205
0
                me_ctx->zz_sad[list_i][ref_i] = zz_sad;
2206
0
                best_zz_sad                   = MIN(best_zz_sad, zz_sad);
2207
0
            }
2208
0
        }
2209
0
    }
2210
0
    const uint32_t zz_th = me_ctx->me_hme_prune_ctrls.zz_sad_th;
2211
0
    if (me_ctx->temporal_layer_index > 0 && best_zz_sad < zz_th) {
2212
0
        for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) {
2213
0
            for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) {
2214
0
                if (ref_i == 0) {
2215
0
                    continue;
2216
0
                }
2217
2218
0
                const uint32_t zz_sad_pct = me_ctx->me_hme_prune_ctrls.zz_sad_pct;
2219
0
                if ((me_ctx->zz_sad[list_i][ref_i] - best_zz_sad) * 100 > (zz_sad_pct * best_zz_sad)) {
2220
0
                    me_ctx->search_results[list_i][ref_i].do_ref = 0;
2221
0
                }
2222
0
            }
2223
0
        }
2224
0
    }
2225
2226
0
    const uint32_t safe_limit_zz_th = me_ctx->me_safe_limit_zz_th;
2227
0
    if (safe_limit_zz_th) {
2228
0
        bool me_safe_limit_refs = false;
2229
0
        if (pcs->hierarchical_levels > 0 && me_ctx->num_of_list_to_search == 2 &&
2230
0
            pcs->temporal_layer_index >= pcs->hierarchical_levels && pcs->similar_brightness_refs &&
2231
0
            me_ctx->zz_sad[0][0] < safe_limit_zz_th && me_ctx->zz_sad[1][0] < safe_limit_zz_th) {
2232
0
            me_safe_limit_refs = true;
2233
0
        }
2234
2235
0
        for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) {
2236
0
            for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) {
2237
0
                if (me_safe_limit_refs && ref_i > 0) {
2238
0
                    me_ctx->search_results[list_i][ref_i].do_ref = 0;
2239
0
                }
2240
0
            }
2241
0
        }
2242
0
    }
2243
0
}
2244
2245
/*******************************************
2246
 * performs hierarchical ME for a 64x64 block for every ref frame
2247
 *******************************************/
2248
static void hme_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx,
2249
0
                    EbPictureBufferDesc* input_ptr) {
2250
    // If needed, initialize the zz sad array
2251
0
    if (me_ctx->me_early_exit_th || me_ctx->me_safe_limit_zz_th) {
2252
0
        init_zz_sad(pcs, me_ctx, org_x, org_y);
2253
0
    }
2254
2255
0
    if (me_ctx->prehme_ctrl.enable) {
2256
        // perform pre-HME
2257
0
        prehme_b64(pcs, org_x, org_y, me_ctx, input_ptr);
2258
0
    }
2259
2260
0
    if (me_ctx->enable_hme_flag) {
2261
        // perform hierarchical ME level 0
2262
0
        if (me_ctx->enable_hme_level0_flag) {
2263
0
            hme_level0_b64(pcs, org_x, org_y, me_ctx, input_ptr);
2264
0
        }
2265
2266
        // perform hierarchical ME level 1
2267
0
        if (me_ctx->enable_hme_level1_flag) {
2268
0
            hme_level1_b64(pcs, org_x, org_y, me_ctx, input_ptr);
2269
0
        }
2270
2271
        // perform hierarchical ME level 2
2272
0
        if (me_ctx->enable_hme_level2_flag) {
2273
0
            hme_level2_b64(pcs, org_x, org_y, me_ctx, input_ptr);
2274
0
        }
2275
0
    }
2276
2277
    // Set final MV centre
2278
0
    set_final_search_centre_sb(pcs, me_ctx);
2279
2280
0
    if (me_ctx->me_type == ME_MCTF) {
2281
0
        if (ABS(me_ctx->search_results[0][0].hme_sc_x) > ABS(me_ctx->search_results[0][0].hme_sc_y)) {
2282
0
            me_ctx->tf_tot_horz_blks++;
2283
0
        } else {
2284
0
            me_ctx->tf_tot_vert_blks++;
2285
0
        }
2286
0
    }
2287
0
}
2288
2289
0
static void hme_prune_ref_and_adjust_sr(MeContext* me_ctx) {
2290
0
    uint16_t prune_ref_th = me_ctx->me_hme_prune_ctrls.prune_ref_if_hme_sad_dev_bigger_than_th;
2291
0
    if (me_ctx->me_hme_prune_ctrls.enable_me_hme_ref_pruning && (prune_ref_th != (uint16_t)~0)) {
2292
0
        uint64_t best = (uint64_t)~0;
2293
0
        for (int i = 0; i < MAX_NUM_OF_REF_PIC_LIST; ++i) {
2294
0
            for (int j = 0; j < REF_LIST_MAX_DEPTH; ++j) {
2295
0
                if (me_ctx->search_results[i][j].hme_sad < best) {
2296
0
                    best = me_ctx->search_results[i][j].hme_sad;
2297
0
                }
2298
0
            }
2299
0
        }
2300
        // Prune references based on HME sad
2301
0
        for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) {
2302
0
            for (uint32_t ri = 1; ri < REF_LIST_MAX_DEPTH; ri++) {
2303
0
                if ((me_ctx->search_results[li][ri].hme_sad - best) * 100 > (prune_ref_th * best)) {
2304
0
                    me_ctx->search_results[li][ri].do_ref = 0;
2305
0
                }
2306
0
            }
2307
0
        }
2308
0
    }
2309
0
    if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment) {
2310
0
        uint16_t mv_length_th              = me_ctx->me_sr_adjustment_ctrls.reduce_me_sr_based_on_mv_length_th;
2311
0
        uint16_t stationary_hme_sad_abs_th = me_ctx->me_sr_adjustment_ctrls.stationary_hme_sad_abs_th;
2312
0
        uint16_t reduce_me_sr_based_on_hme_sad_abs_th =
2313
0
            me_ctx->me_sr_adjustment_ctrls.reduce_me_sr_based_on_hme_sad_abs_th;
2314
        // Reduce the ME search region if the hme sad is low
2315
0
        for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) {
2316
0
            for (uint32_t ri = 0; ri < REF_LIST_MAX_DEPTH; ri++) {
2317
0
                if (ABS(me_ctx->search_results[li][ri].hme_sc_x) <= mv_length_th &&
2318
0
                    ABS(me_ctx->search_results[li][ri].hme_sc_y) <= mv_length_th &&
2319
0
                    me_ctx->search_results[li][ri].hme_sad < stationary_hme_sad_abs_th) {
2320
0
                    me_ctx->reduce_me_sr_divisor[li][ri] = me_ctx->me_sr_adjustment_ctrls.stationary_me_sr_divisor;
2321
0
                } else if (me_ctx->search_results[li][ri].hme_sad < reduce_me_sr_based_on_hme_sad_abs_th) {
2322
0
                    me_ctx->reduce_me_sr_divisor[li][ri] = me_ctx->me_sr_adjustment_ctrls.me_sr_divisor_for_low_hme_sad;
2323
0
                }
2324
0
            }
2325
0
        }
2326
0
    }
2327
0
}
2328
2329
static const uint8_t z_to_raster[85] = {
2330
    0,  1,  2,  3,  4,  5,  6,  9,  10, 7,  8,  11, 12, 13, 14, 17, 18, 15, 16, 19, 20, 21, 22, 29, 30, 23, 24, 31, 32,
2331
    37, 38, 45, 46, 39, 40, 47, 48, 25, 26, 33, 34, 27, 28, 35, 36, 41, 42, 49, 50, 43, 44, 51, 52, 53, 54, 61, 62, 55,
2332
    56, 63, 64, 69, 70, 77, 78, 71, 72, 79, 80, 57, 58, 65, 66, 59, 60, 67, 68, 73, 74, 81, 82, 75, 76, 83, 84};
2333
2334
static void construct_me_candidate_array_mrp_off(PictureParentControlSet* pcs, MeContext* me_ctx,
2335
0
                                                 uint32_t num_of_list_to_search, uint32_t sb_index) {
2336
    // This function should only be called if there is one ref frame in each list
2337
0
    assert(me_ctx->num_of_ref_pic_to_search[0] == 1);
2338
0
    assert(me_ctx->num_of_ref_pic_to_search[1] == 1);
2339
0
    const uint8_t ref_pic_idx = 0;
2340
2341
    // Set whether the reference from each list is allowed
2342
0
    uint8_t blk_do_ref_org[MAX_NUM_OF_REF_PIC_LIST];
2343
0
    blk_do_ref_org[REF_LIST_0] = me_ctx->search_results[REF_LIST_0][0].do_ref;
2344
0
    blk_do_ref_org[REF_LIST_1] = (num_of_list_to_search == 1) ? 0 : me_ctx->search_results[REF_LIST_1][0].do_ref;
2345
2346
0
    if (num_of_list_to_search < 2 || !me_ctx->search_results[REF_LIST_1][0].do_ref) {
2347
0
        num_of_list_to_search = 1;
2348
0
    }
2349
0
    const uint32_t me_prune_th = (blk_do_ref_org[0] && blk_do_ref_org[1]) ? me_ctx->prune_me_candidates_th : 0;
2350
2351
    // Set the count to 1 for all PUs using memset, which is faster than setting at the end of each loop.  The count will only need
2352
    // to be updated if both reference frames are allowed.
2353
0
    uint8_t number_of_pus = pcs->enable_me_16x16
2354
0
        ? pcs->enable_me_8x8 ? pcs->max_number_of_pus_per_sb : MAX_SB64_PU_COUNT_NO_8X8
2355
0
        : MAX_SB64_PU_COUNT_WO_16X16;
2356
0
    memset(pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index, 1, number_of_pus);
2357
2358
0
    for (uint8_t n_idx = 0; n_idx < pcs->max_number_of_pus_per_sb; ++n_idx) {
2359
0
        const uint8_t pu_index       = z_to_raster[n_idx];
2360
0
        uint8_t       me_cand_offset = 0;
2361
2362
0
        uint8_t      use_me_pu          = pcs->enable_me_16x16 ? pcs->enable_me_8x8 || n_idx < MAX_SB64_PU_COUNT_NO_8X8
2363
0
                                                               : n_idx < MAX_SB64_PU_COUNT_WO_16X16;
2364
0
        MeCandidate* me_candidate_array = NULL;
2365
0
        if (use_me_pu) {
2366
0
            me_candidate_array =
2367
0
                &pcs->pa_me_data->me_results[sb_index]->me_candidate_array[pu_index * pcs->pa_me_data->max_cand];
2368
0
        }
2369
0
        uint8_t        blk_do_ref[MAX_NUM_OF_REF_PIC_LIST] = {blk_do_ref_org[REF_LIST_0], blk_do_ref_org[REF_LIST_1]};
2370
0
        const uint32_t best_me_dist                        = blk_do_ref_org[REF_LIST_0] && blk_do_ref_org[REF_LIST_1]
2371
0
                                   ? MIN(me_ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx],
2372
0
                  me_ctx->p_sb_best_sad[REF_LIST_1][ref_pic_idx][n_idx])
2373
0
                                   : blk_do_ref_org[REF_LIST_0] ? me_ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx]
2374
0
                                                                : me_ctx->p_sb_best_sad[REF_LIST_1][ref_pic_idx][n_idx];
2375
2376
0
        me_ctx->me_distortion[pu_index] = best_me_dist;
2377
0
        int8_t min_dist_list            = -1;
2378
        // If both refs have a candidate, use only the best one for unipred
2379
0
        if (me_ctx->use_best_unipred_cand_only && blk_do_ref[REF_LIST_0] && blk_do_ref[REF_LIST_1]) {
2380
0
            min_dist_list = me_ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx] <
2381
0
                    me_ctx->p_sb_best_sad[REF_LIST_1][ref_pic_idx][n_idx]
2382
0
                ? 0
2383
0
                : 1;
2384
0
        }
2385
        // Unipred candidates
2386
0
        for (int list_index = REF_LIST_0;
2387
0
             (uint32_t)list_index < num_of_list_to_search && (use_me_pu || me_cand_offset == 0);
2388
0
             ++list_index) {
2389
            //ME was skipped, so do not add this Unipred candidate
2390
0
            if (blk_do_ref[list_index] == 0) {
2391
0
                continue;
2392
0
            }
2393
2394
0
            if (me_prune_th > 0) {
2395
0
                uint32_t current_to_best_dist_distance = (me_ctx->p_sb_best_sad[list_index][ref_pic_idx][n_idx] -
2396
0
                                                          best_me_dist) *
2397
0
                    100;
2398
0
                if (current_to_best_dist_distance > (best_me_dist * me_prune_th)) {
2399
0
                    blk_do_ref[list_index] = 0;
2400
0
                    continue;
2401
0
                }
2402
0
            }
2403
0
            if (min_dist_list != -1 && min_dist_list != list_index) {
2404
                // Need to save the MV in case bipred is injected
2405
0
                if (use_me_pu) {
2406
0
                    pcs->pa_me_data->me_results[sb_index]
2407
0
                        ->me_mv_array[pu_index * pcs->pa_me_data->max_refs +
2408
0
                                      (list_index ? pcs->pa_me_data->max_l0 : 0) + ref_pic_idx]
2409
0
                        .as_int = me_ctx->p_sb_best_mv[list_index][ref_pic_idx][n_idx];
2410
0
                }
2411
0
                continue;
2412
0
            }
2413
0
            if (use_me_pu) {
2414
0
                me_candidate_array[me_cand_offset].direction  = list_index;
2415
0
                me_candidate_array[me_cand_offset].ref_idx_l0 = ref_pic_idx;
2416
0
                me_candidate_array[me_cand_offset].ref_idx_l1 = ref_pic_idx;
2417
0
                me_candidate_array[me_cand_offset].ref0_list  = list_index == 0 ? list_index : 24;
2418
0
                me_candidate_array[me_cand_offset].ref1_list  = list_index == 1 ? list_index : 24;
2419
2420
0
                pcs->pa_me_data->me_results[sb_index]
2421
0
                    ->me_mv_array[pu_index * pcs->pa_me_data->max_refs + (list_index ? pcs->pa_me_data->max_l0 : 0) +
2422
0
                                  ref_pic_idx]
2423
0
                    .as_int = me_ctx->p_sb_best_mv[list_index][ref_pic_idx][n_idx];
2424
0
            }
2425
2426
0
            me_cand_offset++;
2427
0
        }
2428
2429
        // Can have up to one bipred cand (LAST ,BWD)
2430
0
        if (blk_do_ref[REF_LIST_0] && blk_do_ref[REF_LIST_1] && use_me_pu) {
2431
            // If get here, will have 3 candidates, since both unipred directions are valid
2432
0
            assert(num_of_list_to_search == 2);
2433
0
            me_candidate_array[me_cand_offset].direction  = BI_PRED;
2434
0
            me_candidate_array[me_cand_offset].ref_idx_l0 = ref_pic_idx;
2435
0
            me_candidate_array[me_cand_offset].ref_idx_l1 = ref_pic_idx;
2436
0
            me_candidate_array[me_cand_offset].ref0_list  = REFERENCE_PIC_LIST_0;
2437
0
            me_candidate_array[me_cand_offset].ref1_list  = REFERENCE_PIC_LIST_1;
2438
2439
            // store total me candidate count
2440
0
            pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index[pu_index] = me_cand_offset + 1;
2441
0
        }
2442
0
    }
2443
0
}
2444
2445
static void construct_me_candidate_array_single_ref(PictureParentControlSet* pcs, MeContext* ctx,
2446
0
                                                    uint32_t num_of_list_to_search, uint32_t sb_index) {
2447
    // This function should only be called if there is one ref frame in list 0
2448
0
    assert(ctx->num_of_ref_pic_to_search[0] == 1);
2449
0
    assert(ctx->num_of_ref_pic_to_search[1] == 0);
2450
0
    const uint8_t ref_pic_idx = 0;
2451
2452
    // Set whether the reference from each list is allowed
2453
0
    uint8_t blk_do_ref = ctx->search_results[REF_LIST_0][0].do_ref;
2454
2455
0
    if (num_of_list_to_search < 2 || !ctx->search_results[REF_LIST_1][0].do_ref) {
2456
0
        num_of_list_to_search = 1;
2457
0
    }
2458
2459
    // Set the count to 1 for all PUs using memset, which is faster than setting at the end of each loop.  The count will only need
2460
    // to be updated if both reference frames are allowed.
2461
0
    uint8_t number_of_pus = pcs->enable_me_16x16
2462
0
        ? pcs->enable_me_8x8 ? pcs->max_number_of_pus_per_sb : MAX_SB64_PU_COUNT_NO_8X8
2463
0
        : MAX_SB64_PU_COUNT_WO_16X16;
2464
0
    memset(pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index, 1, number_of_pus);
2465
2466
0
    for (uint8_t n_idx = 0; n_idx < pcs->max_number_of_pus_per_sb; ++n_idx) {
2467
0
        const uint8_t pu_index = z_to_raster[n_idx];
2468
2469
0
        uint8_t      use_me_pu          = pcs->enable_me_16x16 ? pcs->enable_me_8x8 || n_idx < MAX_SB64_PU_COUNT_NO_8X8
2470
0
                                                               : n_idx < MAX_SB64_PU_COUNT_WO_16X16;
2471
0
        MeCandidate* me_candidate_array = NULL;
2472
0
        if (use_me_pu) {
2473
0
            me_candidate_array =
2474
0
                &pcs->pa_me_data->me_results[sb_index]->me_candidate_array[pu_index * pcs->pa_me_data->max_cand];
2475
0
        }
2476
0
        ctx->me_distortion[pu_index] = ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx];
2477
0
        ;
2478
2479
        //ME was skipped, so do not add this Unipred candidate
2480
0
        if (blk_do_ref == 0) {
2481
0
            continue;
2482
0
        }
2483
2484
0
        if (use_me_pu) {
2485
0
            me_candidate_array[0].direction  = REF_LIST_0;
2486
0
            me_candidate_array[0].ref_idx_l0 = ref_pic_idx;
2487
0
            me_candidate_array[0].ref_idx_l1 = ref_pic_idx;
2488
0
            me_candidate_array[0].ref0_list  = 0;
2489
0
            me_candidate_array[0].ref1_list  = 0;
2490
2491
0
            pcs->pa_me_data->me_results[sb_index]
2492
0
                ->me_mv_array[pu_index * pcs->pa_me_data->max_refs + ref_pic_idx]
2493
0
                .as_int = ctx->p_sb_best_mv[0][ref_pic_idx][n_idx];
2494
0
        }
2495
0
    }
2496
0
}
2497
2498
static void construct_me_candidate_array(PictureParentControlSet* pcs, MeContext* me_ctx,
2499
0
                                         uint32_t num_of_list_to_search, uint32_t sb_index) {
2500
0
    for (uint32_t n_idx = 0; n_idx < pcs->max_number_of_pus_per_sb; ++n_idx) {
2501
0
        uint8_t pu_index       = (n_idx > 4) ? z_to_raster[n_idx] : n_idx;
2502
0
        uint8_t me_cand_offset = 0;
2503
2504
0
        uint8_t      use_me_pu          = pcs->enable_me_16x16 ? pcs->enable_me_8x8 || n_idx < MAX_SB64_PU_COUNT_NO_8X8
2505
0
                                                               : n_idx < MAX_SB64_PU_COUNT_WO_16X16;
2506
0
        MeCandidate* me_candidate_array = NULL;
2507
0
        if (use_me_pu) {
2508
0
            me_candidate_array =
2509
0
                &pcs->pa_me_data->me_results[sb_index]->me_candidate_array[pu_index * pcs->pa_me_data->max_cand];
2510
0
        }
2511
0
        uint8_t        blk_do_ref[MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX];
2512
0
        uint32_t       current_to_best_dist_distance;
2513
0
        const uint32_t me_prune_th  = me_ctx->prune_me_candidates_th; //to change to 32bit
2514
0
        uint32_t       best_me_dist = (uint32_t)~0;
2515
2516
        // Determine the best ME distortion
2517
0
        for (uint32_t list_index = REF_LIST_0; list_index < num_of_list_to_search; list_index++) {
2518
0
            const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
2519
0
            for (uint32_t ref_pic = 0; ref_pic < num_of_ref_pic_to_search; ref_pic++) {
2520
0
                blk_do_ref[list_index][ref_pic] = me_ctx->search_results[list_index][ref_pic].do_ref;
2521
0
                if (blk_do_ref[list_index][ref_pic] == 0) {
2522
0
                    continue;
2523
0
                }
2524
2525
0
                best_me_dist = me_ctx->p_sb_best_sad[list_index][ref_pic][n_idx] < best_me_dist
2526
0
                    ? me_ctx->p_sb_best_sad[list_index][ref_pic][n_idx]
2527
0
                    : best_me_dist;
2528
0
            }
2529
0
        }
2530
2531
0
        me_ctx->me_distortion[pu_index] = best_me_dist;
2532
        // Unipred candidates
2533
0
        for (uint32_t list_index = REF_LIST_0; list_index < num_of_list_to_search && (use_me_pu || me_cand_offset == 0);
2534
0
             ++list_index) {
2535
0
            const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index];
2536
2537
0
            for (uint32_t ref_pic_index = 0;
2538
0
                 (ref_pic_index < num_of_ref_pic_to_search) && (use_me_pu || (me_cand_offset == 0));
2539
0
                 ++ref_pic_index) {
2540
                //ME was skipped, so do not add this Unipred candidate
2541
0
                if (blk_do_ref[list_index][ref_pic_index] == 0) {
2542
0
                    continue;
2543
0
                }
2544
2545
0
                if (me_prune_th > 0) {
2546
0
                    current_to_best_dist_distance = (me_ctx->p_sb_best_sad[list_index][ref_pic_index][n_idx] -
2547
0
                                                     best_me_dist) *
2548
0
                        100;
2549
0
                    if (current_to_best_dist_distance > (best_me_dist * me_prune_th)) {
2550
0
                        blk_do_ref[list_index][ref_pic_index] = 0;
2551
0
                        continue;
2552
0
                    }
2553
0
                }
2554
0
                if (use_me_pu) {
2555
0
                    me_candidate_array[me_cand_offset].direction  = list_index;
2556
0
                    me_candidate_array[me_cand_offset].ref_idx_l0 = ref_pic_index;
2557
0
                    me_candidate_array[me_cand_offset].ref_idx_l1 = ref_pic_index;
2558
0
                    me_candidate_array[me_cand_offset].ref0_list  = list_index == 0 ? list_index : 24;
2559
0
                    me_candidate_array[me_cand_offset].ref1_list  = list_index == 1 ? list_index : 24;
2560
2561
0
                    pcs->pa_me_data->me_results[sb_index]
2562
0
                        ->me_mv_array[pu_index * pcs->pa_me_data->max_refs +
2563
0
                                      (list_index ? pcs->pa_me_data->max_l0 : 0) + ref_pic_index]
2564
0
                        .as_int = me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx];
2565
0
                }
2566
0
                me_cand_offset++;
2567
0
            }
2568
0
        }
2569
0
        if (num_of_list_to_search == 2 && use_me_pu) {
2570
            // 1st set of BIPRED cand
2571
            // (LAST ,BWD), (LAST,ALT ), (LAST,ALT2 )
2572
            // (LAST2,BWD), (LAST2,ALT), (LAST2,ALT2)
2573
            // (LAST3,BWD), (LAST3,ALT), (LAST3,ALT2)
2574
            // (GOLD ,BWD), (GOLD,ALT ), (GOLD,ALT2 )
2575
0
            for (uint32_t first_list_ref_pict_idx = 0;
2576
0
                 first_list_ref_pict_idx < me_ctx->num_of_ref_pic_to_search[REF_LIST_0];
2577
0
                 first_list_ref_pict_idx++) {
2578
0
                for (uint32_t second_list_ref_pict_idx = 0;
2579
0
                     second_list_ref_pict_idx < me_ctx->num_of_ref_pic_to_search[REF_LIST_1];
2580
0
                     second_list_ref_pict_idx++) {
2581
0
                    if (pcs->scs->mrp_ctrls.only_l_bwd &&
2582
0
                        (first_list_ref_pict_idx > 0 || second_list_ref_pict_idx > 0)) {
2583
0
                        continue;
2584
0
                    }
2585
0
                    if (blk_do_ref[REF_LIST_0][first_list_ref_pict_idx] &&
2586
0
                        blk_do_ref[REF_LIST_1][second_list_ref_pict_idx]) {
2587
0
                        me_candidate_array[me_cand_offset].direction  = BI_PRED;
2588
0
                        me_candidate_array[me_cand_offset].ref_idx_l0 = first_list_ref_pict_idx;
2589
0
                        me_candidate_array[me_cand_offset].ref_idx_l1 = second_list_ref_pict_idx;
2590
0
                        me_candidate_array[me_cand_offset].ref0_list  = REFERENCE_PIC_LIST_0;
2591
0
                        me_candidate_array[me_cand_offset].ref1_list  = REFERENCE_PIC_LIST_1;
2592
0
                        me_cand_offset++;
2593
0
                    }
2594
0
                }
2595
0
            }
2596
0
            if (!pcs->scs->mrp_ctrls.only_l_bwd) {
2597
                // 2nd set of BIPRED cand: (LAST,LAST2) (LAST,LAST3) (LAST,GOLD)
2598
0
                for (uint32_t first_list_ref_pict_idx = 1;
2599
0
                     first_list_ref_pict_idx < me_ctx->num_of_ref_pic_to_search[REF_LIST_0];
2600
0
                     first_list_ref_pict_idx++) {
2601
0
                    if (blk_do_ref[REF_LIST_0][0] && blk_do_ref[REF_LIST_0][first_list_ref_pict_idx]) {
2602
0
                        me_candidate_array[me_cand_offset].direction  = BI_PRED;
2603
0
                        me_candidate_array[me_cand_offset].ref_idx_l0 = 0;
2604
0
                        me_candidate_array[me_cand_offset].ref_idx_l1 = first_list_ref_pict_idx;
2605
0
                        me_candidate_array[me_cand_offset].ref0_list  = REFERENCE_PIC_LIST_0;
2606
0
                        me_candidate_array[me_cand_offset].ref1_list  = REFERENCE_PIC_LIST_0;
2607
0
                        me_cand_offset++;
2608
0
                    }
2609
0
                }
2610
0
            }
2611
2612
            // 3rd set of BIPRED cand: (BWD, ALT)
2613
0
            if (!pcs->scs->mrp_ctrls.only_l_bwd) {
2614
0
                if (me_ctx->num_of_ref_pic_to_search[REF_LIST_1] == 3 && blk_do_ref[REF_LIST_1][0] &&
2615
0
                    blk_do_ref[REF_LIST_1][2]) {
2616
0
                    {
2617
0
                        me_candidate_array[me_cand_offset].direction  = BI_PRED;
2618
0
                        me_candidate_array[me_cand_offset].ref_idx_l0 = 0;
2619
0
                        me_candidate_array[me_cand_offset].ref_idx_l1 = 2;
2620
0
                        me_candidate_array[me_cand_offset].ref0_list  = REFERENCE_PIC_LIST_1;
2621
0
                        me_candidate_array[me_cand_offset].ref1_list  = REFERENCE_PIC_LIST_1;
2622
0
                        me_cand_offset++;
2623
0
                    }
2624
0
                }
2625
0
            }
2626
0
        }
2627
2628
        // store total me candidate count
2629
0
        if (use_me_pu) {
2630
0
            pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index[pu_index] = me_cand_offset;
2631
0
        }
2632
0
    }
2633
0
}
2634
2635
// Active and stationary detection for global motion
2636
static void perform_gm_detection(
2637
    PictureParentControlSet* pcs, // input parameter, Picture Control Set Ptr
2638
    uint32_t                 sb_index, // input parameter, SB Index
2639
    MeContext*               me_ctx // input parameter, ME Context Ptr, used to store decimated/interpolated SB/SR
2640
0
) {
2641
0
    SequenceControlSet* scs = pcs->scs;
2642
0
    uint64_t            per_sig_cnt[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH][NUM_MV_COMPONENTS][NUM_MV_HIST];
2643
0
    uint64_t            tot_cnt = 0;
2644
0
    svt_memset(per_sig_cnt, 0, sizeof(per_sig_cnt));
2645
2646
0
    if (scs->input_resolution <= INPUT_SIZE_480p_RANGE) {
2647
0
        for (unsigned i = 0; i < 64; i++) {
2648
0
            uint8_t n_idx = 21 + i;
2649
0
            if (!pcs->enable_me_8x8) {
2650
0
                if (n_idx >= MAX_SB64_PU_COUNT_NO_8X8) {
2651
0
                    n_idx = me_idx_85_8x8_to_16x16_conversion[n_idx - MAX_SB64_PU_COUNT_NO_8X8];
2652
0
                }
2653
0
                if (!pcs->enable_me_16x16) {
2654
0
                    if (n_idx >= MAX_SB64_PU_COUNT_WO_16X16) {
2655
0
                        n_idx = me_idx_16x16_to_parent_32x32_conversion[n_idx - MAX_SB64_PU_COUNT_WO_16X16];
2656
0
                    }
2657
0
                }
2658
0
            }
2659
0
            MeCandidate* me_candidate = &(
2660
0
                pcs->pa_me_data->me_results[sb_index]->me_candidate_array[n_idx * pcs->pa_me_data->max_cand]);
2661
2662
0
            uint32_t list_index    = (me_candidate->direction == 0 || me_candidate->direction == 2)
2663
0
                   ? me_candidate->ref0_list
2664
0
                   : me_candidate->ref1_list;
2665
0
            uint32_t ref_pic_index = (me_candidate->direction == 0 || me_candidate->direction == 2)
2666
0
                ? me_candidate->ref_idx_l0
2667
0
                : me_candidate->ref_idx_l1;
2668
2669
            // Active block detection
2670
0
            const int active_th = 4;
2671
0
            int       mx        = _MVXT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2;
2672
0
            if (mx < -active_th) {
2673
0
                per_sig_cnt[list_index][ref_pic_index][0][0]++;
2674
0
            } else if (mx > active_th) {
2675
0
                per_sig_cnt[list_index][ref_pic_index][0][1]++;
2676
0
            }
2677
0
            int my = _MVYT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2;
2678
0
            if (my < -active_th) {
2679
0
                per_sig_cnt[list_index][ref_pic_index][1][0]++;
2680
0
            } else if (my > active_th) {
2681
0
                per_sig_cnt[list_index][ref_pic_index][1][1]++;
2682
0
            }
2683
2684
0
            tot_cnt++;
2685
0
        }
2686
0
    } else {
2687
0
        for (unsigned i = 0; i < 16; i++) {
2688
0
            uint8_t n_idx = 5 + i;
2689
0
            if (!pcs->enable_me_16x16) {
2690
0
                if (n_idx >= MAX_SB64_PU_COUNT_WO_16X16) {
2691
0
                    n_idx = me_idx_16x16_to_parent_32x32_conversion[n_idx - MAX_SB64_PU_COUNT_WO_16X16];
2692
0
                }
2693
0
            }
2694
0
            MeCandidate* me_candidate = &(
2695
0
                pcs->pa_me_data->me_results[sb_index]->me_candidate_array[n_idx * pcs->pa_me_data->max_cand]);
2696
2697
0
            uint32_t list_index    = (me_candidate->direction == 0 || me_candidate->direction == 2)
2698
0
                   ? me_candidate->ref0_list
2699
0
                   : me_candidate->ref1_list;
2700
0
            uint32_t ref_pic_index = (me_candidate->direction == 0 || me_candidate->direction == 2)
2701
0
                ? me_candidate->ref_idx_l0
2702
0
                : me_candidate->ref_idx_l1;
2703
2704
            // Active block detection
2705
0
            const int active_th = 32;
2706
0
            int       mx        = _MVXT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2;
2707
0
            if (mx < -active_th) {
2708
0
                per_sig_cnt[list_index][ref_pic_index][0][0]++;
2709
0
            } else if (mx > active_th) {
2710
0
                per_sig_cnt[list_index][ref_pic_index][0][1]++;
2711
0
            }
2712
0
            int my = _MVYT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2;
2713
0
            if (my < -active_th) {
2714
0
                per_sig_cnt[list_index][ref_pic_index][1][0]++;
2715
0
            } else if (my > active_th) {
2716
0
                per_sig_cnt[list_index][ref_pic_index][1][1]++;
2717
0
            }
2718
2719
0
            tot_cnt++;
2720
0
        }
2721
0
    }
2722
2723
0
    for (int l = 0; l < MAX_NUM_OF_REF_PIC_LIST; l++) {
2724
0
        for (int r = 0; r < REF_LIST_MAX_DEPTH; r++) {
2725
0
            for (int c = 0; c < NUM_MV_COMPONENTS; c++) {
2726
0
                for (int s = 0; s < NUM_MV_HIST; s++) {
2727
0
                    if (per_sig_cnt[l][r][c][s] > (tot_cnt / 2)) {
2728
0
                        pcs->rc_me_allow_gm[sb_index] = 1;
2729
0
                        break;
2730
0
                    }
2731
0
                }
2732
0
            }
2733
0
        }
2734
0
    }
2735
0
}
2736
2737
// Compute the distortion per block size based on the ME results
2738
static void compute_distortion(
2739
    PictureParentControlSet* pcs, // input parameter, Picture Control Set Ptr
2740
    uint32_t                 b64_index, // input parameter, B64 Index
2741
    MeContext*               me_ctx // input parameter, ME Context Ptr, used to store decimated/interpolated SB/SR
2742
0
) {
2743
0
    SequenceControlSet* scs = pcs->scs;
2744
    // Determine sb_64x64_me_class
2745
0
    B64Geom* b64_geom   = &pcs->b64_geom[b64_index];
2746
0
    uint32_t b64_size   = 64 * 64;
2747
0
    uint32_t dist_64x64 = 0, dist_32x32 = 0, dist_16x16 = 0, dist_8x8 = 0;
2748
2749
    // 64x64
2750
0
    { dist_64x64 = me_ctx->me_distortion[0]; }
2751
2752
    // 32x32
2753
0
    for (unsigned i = 0; i < 4; i++) {
2754
0
        dist_32x32 += me_ctx->me_distortion[1 + i];
2755
0
    }
2756
2757
    // 16x16
2758
0
    for (unsigned i = 0; i < 16; i++) {
2759
0
        dist_16x16 += me_ctx->me_distortion[5 + i];
2760
0
    }
2761
2762
    // 8x8
2763
0
    for (unsigned i = 0; i < 64; i++) {
2764
0
        dist_8x8 += me_ctx->me_distortion[21 + i];
2765
0
    }
2766
2767
0
    uint64_t mean_dist_8x8     = dist_8x8 / 64;
2768
0
    uint64_t sum_ofsq_dist_8x8 = 0;
2769
0
    for (unsigned i = 0; i < 64; i++) {
2770
0
        const int64_t diff = ((int64_t)me_ctx->me_distortion[21 + i] - (int64_t)mean_dist_8x8);
2771
0
        sum_ofsq_dist_8x8 += diff * diff;
2772
0
    }
2773
2774
0
    pcs->me_8x8_cost_variance[b64_index] = (uint32_t)(sum_ofsq_dist_8x8 / 64);
2775
    // Compute the sum of the distortion of all 16 16x16 (720 and above) and
2776
    // 64 8x8 (for lower resolutions) blocks in the SB
2777
0
    pcs->rc_me_distortion[b64_index] = (scs->input_resolution <= INPUT_SIZE_480p_RANGE) ? dist_8x8 : dist_16x16;
2778
0
    const uint32_t pix_num           = b64_geom->width * b64_geom->height;
2779
    // Normalize
2780
0
    pcs->me_64x64_distortion[b64_index] = (dist_64x64 * b64_size) / (pix_num);
2781
0
    pcs->me_32x32_distortion[b64_index] = (dist_32x32 * b64_size) / (pix_num);
2782
0
    pcs->me_16x16_distortion[b64_index] = (dist_16x16 * b64_size) / (pix_num);
2783
0
    pcs->me_8x8_distortion[b64_index]   = (dist_8x8 * b64_size) / (pix_num);
2784
0
}
2785
2786
// Initialize data used in ME/HME
2787
0
static INLINE void init_me_hme_data(MeContext* me_ctx) {
2788
    // Initialize HME search centres to 0
2789
0
    if (me_ctx->enable_hme_flag) {
2790
0
        svt_memset(me_ctx->x_hme_level0_search_center, 0, sizeof(me_ctx->x_hme_level0_search_center));
2791
0
        svt_memset(me_ctx->y_hme_level0_search_center, 0, sizeof(me_ctx->y_hme_level0_search_center));
2792
2793
0
        svt_memset(me_ctx->x_hme_level1_search_center, 0, sizeof(me_ctx->x_hme_level1_search_center));
2794
0
        svt_memset(me_ctx->y_hme_level1_search_center, 0, sizeof(me_ctx->y_hme_level1_search_center));
2795
2796
0
        svt_memset(me_ctx->x_hme_level2_search_center, 0, sizeof(me_ctx->x_hme_level2_search_center));
2797
0
        svt_memset(me_ctx->y_hme_level2_search_center, 0, sizeof(me_ctx->y_hme_level2_search_center));
2798
0
    }
2799
2800
    // R2R FIX: no winner integer MV is set in special case like initial p_sb_best_mv for overlay case,
2801
    // then it sends dirty p_sb_best_mv to MD, initializing it is necessary
2802
0
    svt_memset(me_ctx->p_sb_best_mv, 0, sizeof(me_ctx->p_sb_best_mv));
2803
2804
    //init hme results buffer
2805
0
    for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) {
2806
0
        for (uint32_t ri = 0; ri < REF_LIST_MAX_DEPTH; ri++) {
2807
0
            if (me_ctx->me_type != ME_MCTF) {
2808
0
                me_ctx->search_results[li][ri].list_i = li;
2809
0
            }
2810
0
            me_ctx->search_results[li][ri].ref_i   = ri;
2811
0
            me_ctx->search_results[li][ri].do_ref  = 1;
2812
0
            me_ctx->search_results[li][ri].hme_sad = MAX_U32;
2813
0
            me_ctx->reduce_me_sr_divisor[li][ri]   = 1;
2814
0
            me_ctx->zz_sad[li][ri]                 = (uint32_t)~0;
2815
0
            me_ctx->prehme_data[li][ri][0].valid   = 0;
2816
0
            me_ctx->prehme_data[li][ri][1].valid   = 0;
2817
0
        }
2818
0
    }
2819
0
    svt_memset(me_ctx->performed_phme, 0, sizeof(me_ctx->performed_phme));
2820
0
}
2821
2822
/*******************************************
2823
* motion_estimation
2824
*   performs ME on 64x64 blocks
2825
*******************************************/
2826
2827
EbErrorType svt_aom_motion_estimation_b64(
2828
    PictureParentControlSet* pcs, // input parameter, Picture Control Set Ptr
2829
    uint32_t                 b64_index, // input parameter, SB Index
2830
    uint32_t                 b64_origin_x, // input parameter, SB Origin X
2831
    uint32_t                 b64_origin_y, // input parameter, SB Origin X
2832
    MeContext*               me_ctx, // input parameter, ME Context Ptr, used to store decimated/interpolated SB/SR
2833
    EbPictureBufferDesc*     input_ptr) // input parameter, source Picture Ptr
2834
2835
0
{
2836
0
    EbErrorType return_error = EB_ErrorNone;
2837
2838
0
    uint32_t num_of_list_to_search = me_ctx->num_of_list_to_search;
2839
2840
    // input picture width and height might be disaligned after resizing
2841
    // we use aligned width and height to avoid disalignment of calculation
2842
    // of block size
2843
0
    uint16_t aligned_width  = (uint16_t)ALIGN_POWER_OF_TWO(input_ptr->width, 3);
2844
0
    uint16_t aligned_height = (uint16_t)ALIGN_POWER_OF_TWO(input_ptr->height, 3);
2845
0
    me_ctx->b64_width  = (aligned_width - b64_origin_x) < BLOCK_SIZE_64 ? aligned_width - b64_origin_x : BLOCK_SIZE_64;
2846
0
    me_ctx->b64_height = (aligned_height - b64_origin_y) < BLOCK_SIZE_64 ? aligned_height - b64_origin_y
2847
0
                                                                         : BLOCK_SIZE_64;
2848
2849
    //pruning of the references is not done for alt-ref / when HMeLevel2 not done
2850
0
    uint8_t prune_ref = me_ctx->enable_hme_flag && me_ctx->me_type != ME_MCTF;
2851
    // Initialize ME/HME buffers
2852
0
    init_me_hme_data(me_ctx);
2853
    // HME: Perform Hierarchical Motion Estimation for all reference frames for the current 64x64 block.
2854
0
    hme_b64(pcs, b64_origin_x, b64_origin_y, me_ctx, input_ptr);
2855
2856
0
    if (me_ctx->me_type == ME_MCTF && me_ctx->search_results[0][0].hme_sad < me_ctx->tf_me_exit_th) {
2857
0
        me_ctx->tf_use_pred_64x64_only_th = (uint8_t)~0;
2858
0
        return return_error;
2859
0
    }
2860
    // prune the reference frames based on the HME outputs.
2861
0
    if (prune_ref) {
2862
0
        hme_prune_ref_and_adjust_sr(me_ctx);
2863
0
    }
2864
    // Full pel: Perform the Integer Motion Estimation on the allowed reference frames.
2865
0
    integer_search_b64(pcs, me_ctx, b64_origin_x, b64_origin_y, input_ptr);
2866
2867
    // prune the reference frames
2868
0
    if (prune_ref && me_ctx->me_hme_prune_ctrls.enable_me_hme_ref_pruning) {
2869
0
        me_prune_ref(me_ctx);
2870
0
    }
2871
2872
0
    if (me_ctx->me_type != ME_MCTF) {
2873
0
        {
2874
0
            if (me_ctx->num_of_ref_pic_to_search[REF_LIST_0] == 1 &&
2875
0
                me_ctx->num_of_ref_pic_to_search[REF_LIST_1] == 0) {
2876
0
                construct_me_candidate_array_single_ref(pcs, me_ctx, num_of_list_to_search, b64_index);
2877
0
            } else if (me_ctx->num_of_ref_pic_to_search[REF_LIST_0] == 1 &&
2878
0
                       me_ctx->num_of_ref_pic_to_search[REF_LIST_1] == 1) {
2879
0
                construct_me_candidate_array_mrp_off(pcs, me_ctx, num_of_list_to_search, b64_index);
2880
0
            } else {
2881
0
                construct_me_candidate_array(pcs, me_ctx, num_of_list_to_search, b64_index);
2882
0
            }
2883
0
        }
2884
        // Save the distortion per block size
2885
0
        compute_distortion(pcs, b64_index, me_ctx);
2886
2887
        // Perform GM detection if GM is enabled
2888
0
        pcs->rc_me_allow_gm[b64_index] = 0;
2889
2890
0
        if (pcs->gm_ctrls.enabled) {
2891
0
            perform_gm_detection(pcs, b64_index, me_ctx);
2892
0
        }
2893
0
    }
2894
0
    return return_error;
2895
0
}