/work/svt-av1/Source/Lib/Codec/motion_estimation.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright(c) 2019 Netflix, Inc. |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 2 Clause License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | #include <stdio.h> |
14 | | #include <inttypes.h> |
15 | | |
16 | | #include "aom_dsp_rtcd.h" |
17 | | #include "pcs.h" |
18 | | #include "sequence_control_set.h" |
19 | | #include "motion_estimation.h" |
20 | | #include "utility.h" |
21 | | |
22 | | #include "compute_sad.h" |
23 | | #include "reference_object.h" |
24 | | |
25 | | #include "enc_intra_prediction.h" |
26 | | #include "lambda_rate_tables.h" |
27 | | #include "transforms.h" |
28 | | |
29 | | #include "svt_log.h" |
30 | | #include "resize.h" |
31 | | |
32 | | /******************************************** |
33 | | * Constants |
34 | | ********************************************/ |
35 | 0 | #define REFERENCE_PIC_LIST_0 0 |
36 | 0 | #define REFERENCE_PIC_LIST_1 1 |
37 | | |
38 | | /******************************************* |
39 | | * Compute8x4SAD_Default |
40 | | * Unoptimized 8x4 SAD |
41 | | *******************************************/ |
42 | | uint32_t svt_aom_compute8x4_sad_kernel_c(uint8_t* src, // input parameter, source samples Ptr |
43 | | uint32_t src_stride, // input parameter, source stride |
44 | | uint8_t* ref, // input parameter, reference samples Ptr |
45 | | uint32_t ref_stride) // input parameter, reference stride |
46 | 0 | { |
47 | 0 | uint32_t row_number_in_blocks_8x4; |
48 | 0 | uint32_t sad_block_8x4 = 0; |
49 | |
|
50 | 0 | for (row_number_in_blocks_8x4 = 0; row_number_in_blocks_8x4 < 4; ++row_number_in_blocks_8x4) { |
51 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x00], ref[0x00]); |
52 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x01], ref[0x01]); |
53 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x02], ref[0x02]); |
54 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x03], ref[0x03]); |
55 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x04], ref[0x04]); |
56 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x05], ref[0x05]); |
57 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x06], ref[0x06]); |
58 | 0 | sad_block_8x4 += EB_ABS_DIFF(src[0x07], ref[0x07]); |
59 | 0 | src += src_stride; |
60 | 0 | ref += ref_stride; |
61 | 0 | } |
62 | |
|
63 | 0 | return sad_block_8x4; |
64 | 0 | } |
65 | | |
66 | | /******************************************* |
67 | | * Compute8x8SAD_Default |
68 | | * Unoptimized 8x8 SAD |
69 | | *******************************************/ |
70 | | static uint32_t compute8x8_sad_kernel_c(uint8_t* src, // input parameter, source samples Ptr |
71 | | uint32_t src_stride, // input parameter, source stride |
72 | | uint8_t* ref, // input parameter, reference samples Ptr |
73 | | uint32_t ref_stride) // input parameter, reference stride |
74 | 0 | { |
75 | 0 | uint32_t row_number_in_blocks_8x8; |
76 | 0 | uint32_t sad_block_8x8 = 0; |
77 | |
|
78 | 0 | for (row_number_in_blocks_8x8 = 0; row_number_in_blocks_8x8 < 8; ++row_number_in_blocks_8x8) { |
79 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x00], ref[0x00]); |
80 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x01], ref[0x01]); |
81 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x02], ref[0x02]); |
82 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x03], ref[0x03]); |
83 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x04], ref[0x04]); |
84 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x05], ref[0x05]); |
85 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x06], ref[0x06]); |
86 | 0 | sad_block_8x8 += EB_ABS_DIFF(src[0x07], ref[0x07]); |
87 | 0 | src += src_stride; |
88 | 0 | ref += ref_stride; |
89 | 0 | } |
90 | |
|
91 | 0 | return sad_block_8x8; |
92 | 0 | } |
93 | | |
94 | | /******************************************* |
95 | | Calculate SAD for 16x16 and its 8x8 sublcoks |
96 | | and check if there is improvment, if yes keep |
97 | | the best SAD+MV |
98 | | *******************************************/ |
99 | | void svt_ext_sad_calculation_8x8_16x16_c(uint8_t* src, uint32_t src_stride, uint8_t* ref, uint32_t ref_stride, |
100 | | uint32_t* p_best_sad_8x8, uint32_t* p_best_sad_16x16, uint32_t* p_best_mv8x8, |
101 | | uint32_t* p_best_mv16x16, uint32_t mv, uint32_t* p_sad16x16, |
102 | 0 | uint32_t* p_sad8x8, bool sub_sad) { |
103 | 0 | uint32_t sad16x16; |
104 | |
|
105 | 0 | if (sub_sad) { |
106 | 0 | p_sad8x8[0] = (svt_aom_compute8x4_sad_kernel_c( |
107 | 0 | src + 0 * src_stride + 0, 2 * src_stride, ref + 0 * ref_stride + 0, 2 * ref_stride)) |
108 | 0 | << 1; |
109 | 0 | p_sad8x8[1] = (svt_aom_compute8x4_sad_kernel_c( |
110 | 0 | src + 0 * src_stride + 8, 2 * src_stride, ref + 0 * ref_stride + 8, 2 * ref_stride)) |
111 | 0 | << 1; |
112 | 0 | p_sad8x8[2] = (svt_aom_compute8x4_sad_kernel_c( |
113 | 0 | src + 8 * src_stride + 0, 2 * src_stride, ref + 8 * ref_stride + 0, 2 * ref_stride)) |
114 | 0 | << 1; |
115 | 0 | p_sad8x8[3] = (svt_aom_compute8x4_sad_kernel_c( |
116 | 0 | src + 8 * src_stride + 8, 2 * src_stride, ref + 8 * ref_stride + 8, 2 * ref_stride)) |
117 | 0 | << 1; |
118 | 0 | } else { |
119 | 0 | p_sad8x8[0] = compute8x8_sad_kernel_c( |
120 | 0 | src + 0 * src_stride + 0, src_stride, ref + 0 * ref_stride + 0, ref_stride); |
121 | 0 | p_sad8x8[1] = compute8x8_sad_kernel_c( |
122 | 0 | src + 0 * src_stride + 8, src_stride, ref + 0 * ref_stride + 8, ref_stride); |
123 | 0 | p_sad8x8[2] = compute8x8_sad_kernel_c( |
124 | 0 | src + 8 * src_stride + 0, src_stride, ref + 8 * ref_stride + 0, ref_stride); |
125 | 0 | p_sad8x8[3] = compute8x8_sad_kernel_c( |
126 | 0 | src + 8 * src_stride + 8, src_stride, ref + 8 * ref_stride + 8, ref_stride); |
127 | 0 | } |
128 | |
|
129 | 0 | if (p_sad8x8[0] < p_best_sad_8x8[0]) { |
130 | 0 | p_best_sad_8x8[0] = (uint32_t)p_sad8x8[0]; |
131 | 0 | p_best_mv8x8[0] = mv; |
132 | 0 | } |
133 | |
|
134 | 0 | if (p_sad8x8[1] < p_best_sad_8x8[1]) { |
135 | 0 | p_best_sad_8x8[1] = (uint32_t)p_sad8x8[1]; |
136 | 0 | p_best_mv8x8[1] = mv; |
137 | 0 | } |
138 | |
|
139 | 0 | if (p_sad8x8[2] < p_best_sad_8x8[2]) { |
140 | 0 | p_best_sad_8x8[2] = (uint32_t)p_sad8x8[2]; |
141 | 0 | p_best_mv8x8[2] = mv; |
142 | 0 | } |
143 | |
|
144 | 0 | if (p_sad8x8[3] < p_best_sad_8x8[3]) { |
145 | 0 | p_best_sad_8x8[3] = (uint32_t)p_sad8x8[3]; |
146 | 0 | p_best_mv8x8[3] = mv; |
147 | 0 | } |
148 | |
|
149 | 0 | sad16x16 = p_sad8x8[0] + p_sad8x8[1] + p_sad8x8[2] + p_sad8x8[3]; |
150 | 0 | if (sad16x16 < p_best_sad_16x16[0]) { |
151 | 0 | p_best_sad_16x16[0] = (uint32_t)sad16x16; |
152 | 0 | p_best_mv16x16[0] = mv; |
153 | 0 | } |
154 | |
|
155 | 0 | *p_sad16x16 = (uint32_t)sad16x16; |
156 | 0 | } |
157 | | |
158 | | /******************************************* |
159 | | Calculate SAD for 32x32,64x64 from 16x16 |
160 | | and check if there is improvment, if yes keep |
161 | | the best SAD+MV |
162 | | *******************************************/ |
163 | | void svt_ext_sad_calculation_32x32_64x64_c(uint32_t* p_sad16x16, uint32_t* p_best_sad_32x32, uint32_t* p_best_sad_64x64, |
164 | | uint32_t* p_best_mv32x32, uint32_t* p_best_mv64x64, uint32_t mv, |
165 | 0 | uint32_t* p_sad32x32) { |
166 | 0 | uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64; |
167 | |
|
168 | 0 | p_sad32x32[0] = sad32x32_0 = p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3]; |
169 | 0 | if (sad32x32_0 < p_best_sad_32x32[0]) { |
170 | 0 | p_best_sad_32x32[0] = sad32x32_0; |
171 | 0 | p_best_mv32x32[0] = mv; |
172 | 0 | } |
173 | |
|
174 | 0 | p_sad32x32[1] = sad32x32_1 = p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7]; |
175 | 0 | if (sad32x32_1 < p_best_sad_32x32[1]) { |
176 | 0 | p_best_sad_32x32[1] = sad32x32_1; |
177 | 0 | p_best_mv32x32[1] = mv; |
178 | 0 | } |
179 | |
|
180 | 0 | p_sad32x32[2] = sad32x32_2 = p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11]; |
181 | 0 | if (sad32x32_2 < p_best_sad_32x32[2]) { |
182 | 0 | p_best_sad_32x32[2] = sad32x32_2; |
183 | 0 | p_best_mv32x32[2] = mv; |
184 | 0 | } |
185 | |
|
186 | 0 | p_sad32x32[3] = sad32x32_3 = p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] + p_sad16x16[15]; |
187 | 0 | if (sad32x32_3 < p_best_sad_32x32[3]) { |
188 | 0 | p_best_sad_32x32[3] = sad32x32_3; |
189 | 0 | p_best_mv32x32[3] = mv; |
190 | 0 | } |
191 | 0 | sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3; |
192 | 0 | if (sad64x64 < p_best_sad_64x64[0]) { |
193 | 0 | p_best_sad_64x64[0] = sad64x64; |
194 | 0 | p_best_mv64x64[0] = mv; |
195 | 0 | } |
196 | 0 | } |
197 | | |
198 | | /******************************************* |
199 | | * svt_ext_eight_sad_calculation_8x8_16x16 |
200 | | *******************************************/ |
201 | | static void svt_ext_eight_sad_calculation_8x8_16x16(uint8_t* src, uint32_t src_stride, uint8_t* ref, |
202 | | uint32_t ref_stride, uint32_t mv, uint32_t start_16x16_pos, |
203 | | uint32_t* p_best_sad_8x8, uint32_t* p_best_sad_16x16, |
204 | | uint32_t* p_best_mv8x8, uint32_t* p_best_mv16x16, |
205 | | uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8], |
206 | 0 | bool sub_sad) { |
207 | 0 | const uint32_t start_8x8_pos = 4 * start_16x16_pos; |
208 | 0 | int16_t x_mv, y_mv; |
209 | |
|
210 | 0 | (void)p_eight_sad8x8; |
211 | |
|
212 | 0 | p_best_sad_8x8 += start_8x8_pos; |
213 | 0 | p_best_mv8x8 += start_8x8_pos; |
214 | 0 | p_best_sad_16x16 += start_16x16_pos; |
215 | 0 | p_best_mv16x16 += start_16x16_pos; |
216 | 0 | if (sub_sad) { |
217 | 0 | uint32_t src_stride_sub = (src_stride << 1); |
218 | 0 | uint32_t ref_stride_sub = (ref_stride << 1); |
219 | 0 | for (int search_index = 0; search_index < 8; search_index++) { |
220 | 0 | uint32_t sad8x8_0 = |
221 | 0 | (svt_aom_compute8x4_sad_kernel_c(src, src_stride_sub, ref + search_index, ref_stride_sub)) << 1; |
222 | 0 | if (sad8x8_0 < p_best_sad_8x8[0]) { |
223 | 0 | p_best_sad_8x8[0] = (uint32_t)sad8x8_0; |
224 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
225 | 0 | y_mv = _MVYT(mv); |
226 | 0 | p_best_mv8x8[0] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
227 | 0 | } |
228 | |
|
229 | 0 | uint32_t sad8x8_1 = |
230 | 0 | (svt_aom_compute8x4_sad_kernel_c(src + 8, src_stride_sub, ref + 8 + search_index, ref_stride_sub)) << 1; |
231 | 0 | if (sad8x8_1 < p_best_sad_8x8[1]) { |
232 | 0 | p_best_sad_8x8[1] = (uint32_t)sad8x8_1; |
233 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
234 | 0 | y_mv = _MVYT(mv); |
235 | 0 | p_best_mv8x8[1] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
236 | 0 | } |
237 | |
|
238 | 0 | uint32_t sad8x8_2 = (svt_aom_compute8x4_sad_kernel_c(src + (src_stride << 3), |
239 | 0 | src_stride_sub, |
240 | 0 | ref + (ref_stride << 3) + search_index, |
241 | 0 | ref_stride_sub)) |
242 | 0 | << 1; |
243 | 0 | if (sad8x8_2 < p_best_sad_8x8[2]) { |
244 | 0 | p_best_sad_8x8[2] = (uint32_t)sad8x8_2; |
245 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
246 | 0 | y_mv = _MVYT(mv); |
247 | 0 | p_best_mv8x8[2] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
248 | 0 | } |
249 | |
|
250 | 0 | uint32_t sad8x8_3 = (svt_aom_compute8x4_sad_kernel_c(src + (src_stride << 3) + 8, |
251 | 0 | src_stride_sub, |
252 | 0 | ref + (ref_stride << 3) + 8 + search_index, |
253 | 0 | ref_stride_sub)) |
254 | 0 | << 1; |
255 | 0 | if (sad8x8_3 < p_best_sad_8x8[3]) { |
256 | 0 | p_best_sad_8x8[3] = (uint32_t)sad8x8_3; |
257 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
258 | 0 | y_mv = _MVYT(mv); |
259 | 0 | p_best_mv8x8[3] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
260 | 0 | } |
261 | 0 | uint32_t sad16x16 = p_eight_sad16x16[start_16x16_pos][search_index] = sad8x8_0 + sad8x8_1 + sad8x8_2 + |
262 | 0 | sad8x8_3; |
263 | 0 | if (sad16x16 < p_best_sad_16x16[0]) { |
264 | 0 | p_best_sad_16x16[0] = (uint32_t)sad16x16; |
265 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
266 | 0 | y_mv = _MVYT(mv); |
267 | 0 | p_best_mv16x16[0] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
268 | 0 | } |
269 | 0 | } |
270 | 0 | } else { |
271 | 0 | for (int search_index = 0; search_index < 8; search_index++) { |
272 | 0 | uint32_t sad8x8_0 = compute8x8_sad_kernel_c(src, src_stride, ref + search_index, ref_stride); |
273 | 0 | if (sad8x8_0 < p_best_sad_8x8[0]) { |
274 | 0 | p_best_sad_8x8[0] = (uint32_t)sad8x8_0; |
275 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
276 | 0 | y_mv = _MVYT(mv); |
277 | 0 | p_best_mv8x8[0] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
278 | 0 | } |
279 | |
|
280 | 0 | uint32_t sad8x8_1 = (compute8x8_sad_kernel_c(src + 8, src_stride, ref + 8 + search_index, ref_stride)); |
281 | 0 | if (sad8x8_1 < p_best_sad_8x8[1]) { |
282 | 0 | p_best_sad_8x8[1] = (uint32_t)sad8x8_1; |
283 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
284 | 0 | y_mv = _MVYT(mv); |
285 | 0 | p_best_mv8x8[1] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
286 | 0 | } |
287 | |
|
288 | 0 | uint32_t sad8x8_2 = (compute8x8_sad_kernel_c( |
289 | 0 | src + (src_stride << 3), src_stride, ref + (ref_stride << 3) + search_index, ref_stride)); |
290 | 0 | if (sad8x8_2 < p_best_sad_8x8[2]) { |
291 | 0 | p_best_sad_8x8[2] = (uint32_t)sad8x8_2; |
292 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
293 | 0 | y_mv = _MVYT(mv); |
294 | 0 | p_best_mv8x8[2] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
295 | 0 | } |
296 | |
|
297 | 0 | uint32_t sad8x8_3 = (compute8x8_sad_kernel_c( |
298 | 0 | src + (src_stride << 3) + 8, src_stride, ref + (ref_stride << 3) + 8 + search_index, ref_stride)); |
299 | 0 | if (sad8x8_3 < p_best_sad_8x8[3]) { |
300 | 0 | p_best_sad_8x8[3] = (uint32_t)sad8x8_3; |
301 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
302 | 0 | y_mv = _MVYT(mv); |
303 | 0 | p_best_mv8x8[3] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
304 | 0 | } |
305 | 0 | uint32_t sad16x16 = p_eight_sad16x16[start_16x16_pos][search_index] = sad8x8_0 + sad8x8_1 + sad8x8_2 + |
306 | 0 | sad8x8_3; |
307 | 0 | if (sad16x16 < p_best_sad_16x16[0]) { |
308 | 0 | p_best_sad_16x16[0] = (uint32_t)sad16x16; |
309 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
310 | 0 | y_mv = _MVYT(mv); |
311 | 0 | p_best_mv16x16[0] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
312 | 0 | } |
313 | 0 | } |
314 | 0 | } |
315 | 0 | } |
316 | | |
317 | | void svt_ext_all_sad_calculation_8x8_16x16_c(uint8_t* src, uint32_t src_stride, uint8_t* ref, uint32_t ref_stride, |
318 | | uint32_t mv, uint32_t* p_best_sad_8x8, uint32_t* p_best_sad_16x16, |
319 | | uint32_t* p_best_mv8x8, uint32_t* p_best_mv16x16, |
320 | | uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8], |
321 | 0 | bool sub_sad) { |
322 | 0 | static const char offsets[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; |
323 | | //---- 16x16 : 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 |
324 | 0 | for (int y = 0; y < 4; y++) { |
325 | 0 | for (int x = 0; x < 4; x++) { |
326 | 0 | const uint32_t block_index = 16 * y * src_stride + 16 * x; |
327 | 0 | const uint32_t search_position_index = 16 * y * ref_stride + 16 * x; |
328 | 0 | svt_ext_eight_sad_calculation_8x8_16x16(src + block_index, |
329 | 0 | src_stride, |
330 | 0 | ref + search_position_index, |
331 | 0 | ref_stride, |
332 | 0 | mv, |
333 | 0 | offsets[4 * y + x], |
334 | 0 | p_best_sad_8x8, |
335 | 0 | p_best_sad_16x16, |
336 | 0 | p_best_mv8x8, |
337 | 0 | p_best_mv16x16, |
338 | 0 | p_eight_sad16x16, |
339 | 0 | p_eight_sad8x8, |
340 | 0 | sub_sad); |
341 | 0 | } |
342 | 0 | } |
343 | 0 | } |
344 | | |
345 | | /******************************************* |
346 | | Calculate SAD for 32x32,64x64 from 16x16 |
347 | | and check if there is improvment, if yes keep |
348 | | the best SAD+MV |
349 | | *******************************************/ |
350 | | void svt_ext_eight_sad_calculation_32x32_64x64_c(const uint32_t p_sad16x16[16][8], uint32_t* p_best_sad_32x32, |
351 | | uint32_t* p_best_sad_64x64, uint32_t* p_best_mv32x32, |
352 | 0 | uint32_t* p_best_mv64x64, uint32_t mv, uint32_t p_sad32x32[4][8]) { |
353 | 0 | uint32_t search_index; |
354 | 0 | int16_t x_mv, y_mv; |
355 | 0 | for (search_index = 0; search_index < 8; search_index++) { |
356 | 0 | uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64; |
357 | |
|
358 | 0 | p_sad32x32[0][search_index] = sad32x32_0 = p_sad16x16[0][search_index] + p_sad16x16[1][search_index] + |
359 | 0 | p_sad16x16[2][search_index] + p_sad16x16[3][search_index]; |
360 | 0 | if (sad32x32_0 < p_best_sad_32x32[0]) { |
361 | 0 | p_best_sad_32x32[0] = sad32x32_0; |
362 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
363 | 0 | y_mv = _MVYT(mv); |
364 | 0 | p_best_mv32x32[0] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
365 | 0 | } |
366 | |
|
367 | 0 | p_sad32x32[1][search_index] = sad32x32_1 = p_sad16x16[4][search_index] + p_sad16x16[5][search_index] + |
368 | 0 | p_sad16x16[6][search_index] + p_sad16x16[7][search_index]; |
369 | 0 | if (sad32x32_1 < p_best_sad_32x32[1]) { |
370 | 0 | p_best_sad_32x32[1] = sad32x32_1; |
371 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
372 | 0 | y_mv = _MVYT(mv); |
373 | 0 | p_best_mv32x32[1] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
374 | 0 | } |
375 | |
|
376 | 0 | p_sad32x32[2][search_index] = sad32x32_2 = p_sad16x16[8][search_index] + p_sad16x16[9][search_index] + |
377 | 0 | p_sad16x16[10][search_index] + p_sad16x16[11][search_index]; |
378 | 0 | if (sad32x32_2 < p_best_sad_32x32[2]) { |
379 | 0 | p_best_sad_32x32[2] = sad32x32_2; |
380 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
381 | 0 | y_mv = _MVYT(mv); |
382 | 0 | p_best_mv32x32[2] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
383 | 0 | } |
384 | |
|
385 | 0 | p_sad32x32[3][search_index] = sad32x32_3 = p_sad16x16[12][search_index] + p_sad16x16[13][search_index] + |
386 | 0 | p_sad16x16[14][search_index] + p_sad16x16[15][search_index]; |
387 | 0 | if (sad32x32_3 < p_best_sad_32x32[3]) { |
388 | 0 | p_best_sad_32x32[3] = sad32x32_3; |
389 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
390 | 0 | y_mv = _MVYT(mv); |
391 | 0 | p_best_mv32x32[3] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
392 | 0 | } |
393 | |
|
394 | 0 | sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3; |
395 | 0 | if (sad64x64 < p_best_sad_64x64[0]) { |
396 | 0 | p_best_sad_64x64[0] = sad64x64; |
397 | 0 | x_mv = _MVXT(mv) + (int16_t)search_index; |
398 | 0 | y_mv = _MVYT(mv); |
399 | 0 | p_best_mv64x64[0] = ((uint32_t)y_mv << 16) | ((uint16_t)x_mv); |
400 | 0 | } |
401 | 0 | } |
402 | 0 | } |
403 | | |
404 | | /******************************************* |
405 | | * open_loop_me_get_search_point_results_block |
406 | | *******************************************/ |
407 | | static void open_loop_me_get_eight_search_point_results_block( |
408 | | MeContext* me_ctx, // input parameter, ME context Ptr, used to get SB Ptr |
409 | | uint32_t list_index, // input parameter, reference list index |
410 | | uint32_t ref_pic_index, |
411 | | int32_t search_region_index, // input parameter, search area origin, used to |
412 | | // point to reference samples |
413 | | int32_t x_search_index, // input parameter, search region position in the |
414 | | // horizontal direction, used to derive xMV |
415 | | int32_t y_search_index // input parameter, search region position in the |
416 | | // vertical direction, used to derive yMV |
417 | 0 | ) { |
418 | | // uint32_t ref_luma_stride = ref_pic_ptr->y_stride; // NADER |
419 | | // uint8_t *ref_ptr = ref_pic_ptr->y_buffer; // NADER |
420 | 0 | const bool sub_sad = (me_ctx->me_search_method == SUB_SAD_SEARCH); |
421 | 0 | uint32_t ref_luma_stride = me_ctx->interpolated_full_stride[list_index][ref_pic_index]; |
422 | 0 | uint8_t* ref_ptr = me_ctx->integer_buffer_ptr[list_index][ref_pic_index] + |
423 | 0 | ((ME_FILTER_TAP >> 1) * me_ctx->interpolated_full_stride[list_index][ref_pic_index]) + (ME_FILTER_TAP >> 1) + |
424 | 0 | search_region_index; |
425 | |
|
426 | 0 | uint32_t curr_mv_1 = (((uint32_t)y_search_index) << 16); |
427 | 0 | uint16_t curr_mv_2 = ((uint16_t)x_search_index); |
428 | 0 | uint32_t curr_mv = curr_mv_1 | curr_mv_2; |
429 | |
|
430 | 0 | svt_ext_all_sad_calculation_8x8_16x16(me_ctx->b64_src_ptr, |
431 | 0 | me_ctx->b64_src_stride, |
432 | 0 | ref_ptr, |
433 | 0 | ref_luma_stride, |
434 | 0 | curr_mv, |
435 | 0 | me_ctx->p_best_sad_8x8, |
436 | 0 | me_ctx->p_best_sad_16x16, |
437 | 0 | me_ctx->p_best_mv8x8, |
438 | 0 | me_ctx->p_best_mv16x16, |
439 | 0 | me_ctx->p_eight_sad16x16, |
440 | 0 | me_ctx->p_eight_sad8x8, |
441 | 0 | sub_sad); |
442 | |
|
443 | 0 | svt_ext_eight_sad_calculation_32x32_64x64(me_ctx->p_eight_sad16x16, |
444 | 0 | me_ctx->p_best_sad_32x32, |
445 | 0 | me_ctx->p_best_sad_64x64, |
446 | 0 | me_ctx->p_best_mv32x32, |
447 | 0 | me_ctx->p_best_mv64x64, |
448 | 0 | curr_mv, |
449 | 0 | me_ctx->p_eight_sad32x32); |
450 | 0 | } |
451 | | |
452 | | /******************************************* |
453 | | * open_loop_me_get_search_point_results_block |
454 | | *******************************************/ |
455 | | static void open_loop_me_get_search_point_results_block( |
456 | | MeContext* me_ctx, // input parameter, ME context Ptr, used to get SB Ptr |
457 | | uint32_t list_index, // input parameter, reference list index |
458 | | uint32_t ref_pic_index, |
459 | | int32_t search_region_index, // input parameter, search area origin, used to |
460 | | // point to reference samples |
461 | | int32_t x_search_index, // input parameter, search region position in the |
462 | | // horizontal direction, used to derive xMV |
463 | | int32_t y_search_index) // input parameter, search region position in the |
464 | | // vertical direction, used to derive yMV |
465 | 0 | { |
466 | 0 | const bool sub_sad = (me_ctx->me_search_method == SUB_SAD_SEARCH); |
467 | 0 | uint8_t* src_ptr = me_ctx->b64_src_ptr; |
468 | | |
469 | | // uint8_t *ref_ptr = ref_pic_ptr->y_buffer; // NADER |
470 | 0 | uint8_t* ref_ptr = me_ctx->integer_buffer_ptr[list_index][ref_pic_index] + (ME_FILTER_TAP >> 1) + |
471 | 0 | ((ME_FILTER_TAP >> 1) * me_ctx->interpolated_full_stride[list_index][ref_pic_index]); |
472 | | // uint32_t ref_luma_stride = ref_pic_ptr->y_stride; // NADER |
473 | 0 | uint32_t ref_luma_stride = me_ctx->interpolated_full_stride[list_index][ref_pic_index]; |
474 | 0 | int32_t search_position_tl_index = search_region_index; |
475 | 0 | int32_t search_position_index; |
476 | 0 | int32_t block_index; |
477 | 0 | int32_t src_next_16x16_offset; |
478 | | // uint32_t ref_next_16x16_offset = (ref_pic_ptr->y_stride << 4); // NADER |
479 | 0 | uint32_t ref_next_16x16_offset = (ref_luma_stride << 4); |
480 | 0 | uint32_t curr_mv_1 = (((uint32_t)y_search_index) << 16); |
481 | 0 | uint16_t curr_mv_2 = ((uint16_t)x_search_index); |
482 | 0 | uint32_t curr_mv = curr_mv_1 | curr_mv_2; |
483 | 0 | uint32_t* p_best_sad_8x8 = me_ctx->p_best_sad_8x8; |
484 | 0 | uint32_t* p_best_sad_16x16 = me_ctx->p_best_sad_16x16; |
485 | 0 | uint32_t* p_best_sad_32x32 = me_ctx->p_best_sad_32x32; |
486 | 0 | uint32_t* p_best_sad_64x64 = me_ctx->p_best_sad_64x64; |
487 | 0 | uint32_t* p_best_mv8x8 = me_ctx->p_best_mv8x8; |
488 | 0 | uint32_t* p_best_mv16x16 = me_ctx->p_best_mv16x16; |
489 | 0 | uint32_t* p_best_mv32x32 = me_ctx->p_best_mv32x32; |
490 | 0 | uint32_t* p_best_mv64x64 = me_ctx->p_best_mv64x64; |
491 | 0 | uint32_t* p_sad32x32 = me_ctx->p_sad32x32; |
492 | 0 | uint32_t* p_sad16x16 = me_ctx->p_sad16x16; |
493 | 0 | uint32_t* p_sad8x8 = me_ctx->p_sad8x8; |
494 | | |
495 | | // TODO: block_index search_position_index could be removed |
496 | 0 | const uint32_t src_stride = me_ctx->b64_src_stride; |
497 | 0 | src_next_16x16_offset = src_stride << 4; |
498 | | |
499 | | //---- 16x16 : 0 |
500 | 0 | block_index = 0; |
501 | 0 | search_position_index = search_position_tl_index; |
502 | |
|
503 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
504 | 0 | src_stride, |
505 | 0 | ref_ptr + search_position_index, |
506 | 0 | ref_luma_stride, |
507 | 0 | &p_best_sad_8x8[0], |
508 | 0 | &p_best_sad_16x16[0], |
509 | 0 | &p_best_mv8x8[0], |
510 | 0 | &p_best_mv16x16[0], |
511 | 0 | curr_mv, |
512 | 0 | &p_sad16x16[0], |
513 | 0 | &p_sad8x8[0], |
514 | 0 | sub_sad); |
515 | | |
516 | | //---- 16x16 : 1 |
517 | 0 | block_index = block_index + 16; |
518 | 0 | search_position_index = search_position_tl_index + 16; |
519 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
520 | 0 | src_stride, |
521 | 0 | ref_ptr + search_position_index, |
522 | 0 | ref_luma_stride, |
523 | 0 | &p_best_sad_8x8[4], |
524 | 0 | &p_best_sad_16x16[1], |
525 | 0 | &p_best_mv8x8[4], |
526 | 0 | &p_best_mv16x16[1], |
527 | 0 | curr_mv, |
528 | 0 | &p_sad16x16[1], |
529 | 0 | &p_sad8x8[4], |
530 | 0 | sub_sad); |
531 | | //---- 16x16 : 4 |
532 | 0 | block_index = block_index + 16; |
533 | 0 | search_position_index = search_position_index + 16; |
534 | |
|
535 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
536 | 0 | src_stride, |
537 | 0 | ref_ptr + search_position_index, |
538 | 0 | ref_luma_stride, |
539 | 0 | &p_best_sad_8x8[16], |
540 | 0 | &p_best_sad_16x16[4], |
541 | 0 | &p_best_mv8x8[16], |
542 | 0 | &p_best_mv16x16[4], |
543 | 0 | curr_mv, |
544 | 0 | &p_sad16x16[4], |
545 | 0 | &p_sad8x8[16], |
546 | 0 | sub_sad); |
547 | | |
548 | | //---- 16x16 : 5 |
549 | 0 | block_index = block_index + 16; |
550 | 0 | search_position_index = search_position_index + 16; |
551 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
552 | 0 | src_stride, |
553 | 0 | ref_ptr + search_position_index, |
554 | 0 | ref_luma_stride, |
555 | 0 | &p_best_sad_8x8[20], |
556 | 0 | &p_best_sad_16x16[5], |
557 | 0 | &p_best_mv8x8[20], |
558 | 0 | &p_best_mv16x16[5], |
559 | 0 | curr_mv, |
560 | 0 | &p_sad16x16[5], |
561 | 0 | &p_sad8x8[20], |
562 | 0 | sub_sad); |
563 | | |
564 | | //---- 16x16 : 2 |
565 | 0 | block_index = src_next_16x16_offset; |
566 | 0 | search_position_index = search_position_tl_index + ref_next_16x16_offset; |
567 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
568 | 0 | src_stride, |
569 | 0 | ref_ptr + search_position_index, |
570 | 0 | ref_luma_stride, |
571 | 0 | &p_best_sad_8x8[8], |
572 | 0 | &p_best_sad_16x16[2], |
573 | 0 | &p_best_mv8x8[8], |
574 | 0 | &p_best_mv16x16[2], |
575 | 0 | curr_mv, |
576 | 0 | &p_sad16x16[2], |
577 | 0 | &p_sad8x8[8], |
578 | 0 | sub_sad); |
579 | | //---- 16x16 : 3 |
580 | 0 | block_index = block_index + 16; |
581 | 0 | search_position_index = search_position_index + 16; |
582 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
583 | 0 | src_stride, |
584 | 0 | ref_ptr + search_position_index, |
585 | 0 | ref_luma_stride, |
586 | 0 | &p_best_sad_8x8[12], |
587 | 0 | &p_best_sad_16x16[3], |
588 | 0 | &p_best_mv8x8[12], |
589 | 0 | &p_best_mv16x16[3], |
590 | 0 | curr_mv, |
591 | 0 | &p_sad16x16[3], |
592 | 0 | &p_sad8x8[12], |
593 | 0 | sub_sad); |
594 | | //---- 16x16 : 6 |
595 | 0 | block_index = block_index + 16; |
596 | 0 | search_position_index = search_position_index + 16; |
597 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
598 | 0 | src_stride, |
599 | 0 | ref_ptr + search_position_index, |
600 | 0 | ref_luma_stride, |
601 | 0 | &p_best_sad_8x8[24], |
602 | 0 | &p_best_sad_16x16[6], |
603 | 0 | &p_best_mv8x8[24], |
604 | 0 | &p_best_mv16x16[6], |
605 | 0 | curr_mv, |
606 | 0 | &p_sad16x16[6], |
607 | 0 | &p_sad8x8[24], |
608 | 0 | sub_sad); |
609 | | //---- 16x16 : 7 |
610 | 0 | block_index = block_index + 16; |
611 | 0 | search_position_index = search_position_index + 16; |
612 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
613 | 0 | src_stride, |
614 | 0 | ref_ptr + search_position_index, |
615 | 0 | ref_luma_stride, |
616 | 0 | &p_best_sad_8x8[28], |
617 | 0 | &p_best_sad_16x16[7], |
618 | 0 | &p_best_mv8x8[28], |
619 | 0 | &p_best_mv16x16[7], |
620 | 0 | curr_mv, |
621 | 0 | &p_sad16x16[7], |
622 | 0 | &p_sad8x8[28], |
623 | 0 | sub_sad); |
624 | | |
625 | | //---- 16x16 : 8 |
626 | 0 | block_index = (src_next_16x16_offset << 1); |
627 | 0 | search_position_index = search_position_tl_index + (ref_next_16x16_offset << 1); |
628 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
629 | 0 | src_stride, |
630 | 0 | ref_ptr + search_position_index, |
631 | 0 | ref_luma_stride, |
632 | 0 | &p_best_sad_8x8[32], |
633 | 0 | &p_best_sad_16x16[8], |
634 | 0 | &p_best_mv8x8[32], |
635 | 0 | &p_best_mv16x16[8], |
636 | 0 | curr_mv, |
637 | 0 | &p_sad16x16[8], |
638 | 0 | &p_sad8x8[32], |
639 | 0 | sub_sad); |
640 | | //---- 16x16 : 9 |
641 | 0 | block_index = block_index + 16; |
642 | 0 | search_position_index = search_position_index + 16; |
643 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
644 | 0 | src_stride, |
645 | 0 | ref_ptr + search_position_index, |
646 | 0 | ref_luma_stride, |
647 | 0 | &p_best_sad_8x8[36], |
648 | 0 | &p_best_sad_16x16[9], |
649 | 0 | &p_best_mv8x8[36], |
650 | 0 | &p_best_mv16x16[9], |
651 | 0 | curr_mv, |
652 | 0 | &p_sad16x16[9], |
653 | 0 | &p_sad8x8[36], |
654 | 0 | sub_sad); |
655 | | //---- 16x16 : 12 |
656 | 0 | block_index = block_index + 16; |
657 | 0 | search_position_index = search_position_index + 16; |
658 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
659 | 0 | src_stride, |
660 | 0 | ref_ptr + search_position_index, |
661 | 0 | ref_luma_stride, |
662 | 0 | &p_best_sad_8x8[48], |
663 | 0 | &p_best_sad_16x16[12], |
664 | 0 | &p_best_mv8x8[48], |
665 | 0 | &p_best_mv16x16[12], |
666 | 0 | curr_mv, |
667 | 0 | &p_sad16x16[12], |
668 | 0 | &p_sad8x8[48], |
669 | 0 | sub_sad); |
670 | | //---- 16x16 : 13 |
671 | 0 | block_index = block_index + 16; |
672 | 0 | search_position_index = search_position_index + 16; |
673 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
674 | 0 | src_stride, |
675 | 0 | ref_ptr + search_position_index, |
676 | 0 | ref_luma_stride, |
677 | 0 | &p_best_sad_8x8[52], |
678 | 0 | &p_best_sad_16x16[13], |
679 | 0 | &p_best_mv8x8[52], |
680 | 0 | &p_best_mv16x16[13], |
681 | 0 | curr_mv, |
682 | 0 | &p_sad16x16[13], |
683 | 0 | &p_sad8x8[52], |
684 | 0 | sub_sad); |
685 | | |
686 | | //---- 16x16 : 10 |
687 | 0 | block_index = (src_next_16x16_offset * 3); |
688 | 0 | search_position_index = search_position_tl_index + (ref_next_16x16_offset * 3); |
689 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
690 | 0 | src_stride, |
691 | 0 | ref_ptr + search_position_index, |
692 | 0 | ref_luma_stride, |
693 | 0 | &p_best_sad_8x8[40], |
694 | 0 | &p_best_sad_16x16[10], |
695 | 0 | &p_best_mv8x8[40], |
696 | 0 | &p_best_mv16x16[10], |
697 | 0 | curr_mv, |
698 | 0 | &p_sad16x16[10], |
699 | 0 | &p_sad8x8[40], |
700 | 0 | sub_sad); |
701 | | //---- 16x16 : 11 |
702 | 0 | block_index = block_index + 16; |
703 | 0 | search_position_index = search_position_index + 16; |
704 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
705 | 0 | src_stride, |
706 | 0 | ref_ptr + search_position_index, |
707 | 0 | ref_luma_stride, |
708 | 0 | &p_best_sad_8x8[44], |
709 | 0 | &p_best_sad_16x16[11], |
710 | 0 | &p_best_mv8x8[44], |
711 | 0 | &p_best_mv16x16[11], |
712 | 0 | curr_mv, |
713 | 0 | &p_sad16x16[11], |
714 | 0 | &p_sad8x8[44], |
715 | 0 | sub_sad); |
716 | | //---- 16x16 : 14 |
717 | 0 | block_index = block_index + 16; |
718 | 0 | search_position_index = search_position_index + 16; |
719 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
720 | 0 | src_stride, |
721 | 0 | ref_ptr + search_position_index, |
722 | 0 | ref_luma_stride, |
723 | 0 | &p_best_sad_8x8[56], |
724 | 0 | &p_best_sad_16x16[14], |
725 | 0 | &p_best_mv8x8[56], |
726 | 0 | &p_best_mv16x16[14], |
727 | 0 | curr_mv, |
728 | 0 | &p_sad16x16[14], |
729 | 0 | &p_sad8x8[56], |
730 | 0 | sub_sad); |
731 | | //---- 16x16 : 15 |
732 | 0 | block_index = block_index + 16; |
733 | 0 | search_position_index = search_position_index + 16; |
734 | 0 | svt_ext_sad_calculation_8x8_16x16(src_ptr + block_index, |
735 | 0 | src_stride, |
736 | 0 | ref_ptr + search_position_index, |
737 | 0 | ref_luma_stride, |
738 | 0 | &p_best_sad_8x8[60], |
739 | 0 | &p_best_sad_16x16[15], |
740 | 0 | &p_best_mv8x8[60], |
741 | 0 | &p_best_mv16x16[15], |
742 | 0 | curr_mv, |
743 | 0 | &p_sad16x16[15], |
744 | 0 | &p_sad8x8[60], |
745 | 0 | sub_sad); |
746 | |
|
747 | 0 | svt_ext_sad_calculation_32x32_64x64( |
748 | 0 | p_sad16x16, p_best_sad_32x32, p_best_sad_64x64, p_best_mv32x32, p_best_mv64x64, curr_mv, &p_sad32x32[0]); |
749 | 0 | } |
750 | | |
751 | | /******************************************* |
752 | | * open_loop_me_fullpel_search_sblock |
753 | | *******************************************/ |
754 | | static void open_loop_me_fullpel_search_sblock(MeContext* me_ctx, uint32_t list_index, uint32_t ref_pic_index, |
755 | | int16_t x_search_area_origin, int16_t y_search_area_origin, |
756 | 0 | uint32_t search_area_width, uint32_t search_area_height) { |
757 | 0 | uint32_t x_search_index, y_search_index; |
758 | 0 | uint32_t search_area_width_rest_8 = search_area_width & 7; |
759 | 0 | uint32_t search_area_width_mult_8 = search_area_width - search_area_width_rest_8; |
760 | |
|
761 | 0 | for (y_search_index = 0; y_search_index < search_area_height; y_search_index++) { |
762 | 0 | for (x_search_index = 0; x_search_index < search_area_width_mult_8; x_search_index += 8) { |
763 | | // this function will do: x_search_index, +1, +2, ..., +7 |
764 | 0 | open_loop_me_get_eight_search_point_results_block( |
765 | 0 | me_ctx, |
766 | 0 | list_index, |
767 | 0 | ref_pic_index, |
768 | 0 | x_search_index + y_search_index * me_ctx->interpolated_full_stride[list_index][ref_pic_index], |
769 | 0 | (int32_t)x_search_index + x_search_area_origin, |
770 | 0 | (int32_t)y_search_index + y_search_area_origin); |
771 | 0 | } |
772 | |
|
773 | 0 | for (x_search_index = search_area_width_mult_8; x_search_index < search_area_width; x_search_index++) { |
774 | 0 | open_loop_me_get_search_point_results_block( |
775 | 0 | me_ctx, |
776 | 0 | list_index, |
777 | 0 | ref_pic_index, |
778 | 0 | x_search_index + y_search_index * me_ctx->interpolated_full_stride[list_index][ref_pic_index], |
779 | 0 | (int32_t)x_search_index + x_search_area_origin, |
780 | 0 | (int32_t)y_search_index + y_search_area_origin); |
781 | 0 | } |
782 | 0 | } |
783 | 0 | } |
784 | | |
785 | | // Perform HME Level 0 for one 64x64 block on the given picture |
786 | | static void hme_level_0(MeContext* me_ctx, // ME context Ptr, used to get/update ME results |
787 | | int16_t org_x, // Block position in the horizontal direction- sixteenth resolution |
788 | | int16_t org_y, // Block position in the vertical direction- sixteenth resolution |
789 | | uint32_t block_width, // Block width - sixteenth resolution |
790 | | uint32_t block_height, // Block height - sixteenth resolution |
791 | | int16_t sa_width, // search area width |
792 | | int16_t sa_height, // search area height |
793 | | EbPictureBufferDesc* sixteenth_ref_pic_ptr, // sixteenth-downsampled reference picture |
794 | | uint32_t sr_w, // current search region index in the horizontal direction |
795 | | uint32_t sr_h, // current search region index in the vertical direction |
796 | | uint64_t* best_sad, // output: Level0 SAD at (sr_w, sr_h) |
797 | | int16_t* hme_l0_sc_x, // output: Level0 xMV at (sr_w, sr_h) |
798 | | int16_t* hme_l0_sc_y // output: Level0 yMV at (sr_w, sr_h) |
799 | 0 | ) { |
800 | | // round up the search region width to nearest multiple of 8 because the SAD calculation performance (for |
801 | | // intrinsic functions) is the same for search region width from 1 to 8 |
802 | 0 | sa_width = (int16_t)((sa_width + 7) & ~0x07); |
803 | 0 | int16_t pad_width = (int16_t)(sixteenth_ref_pic_ptr->border) - 1; |
804 | 0 | int16_t pad_height = (int16_t)(sixteenth_ref_pic_ptr->border) - 1; |
805 | |
|
806 | 0 | int16_t x_search_region_distance = sa_width * sr_w; |
807 | 0 | int16_t y_search_region_distance = sa_height * sr_h; |
808 | 0 | int16_t sa_origin_x = -(int16_t)((sa_width * me_ctx->num_hme_sa_w) >> 1) + x_search_region_distance; |
809 | 0 | int16_t sa_origin_y = -(int16_t)((sa_height * me_ctx->num_hme_sa_h) >> 1) + y_search_region_distance; |
810 | | // Correct the left edge of the Search Area if it is not on the reference picture |
811 | 0 | if (((org_x + sa_origin_x) < -pad_width)) { |
812 | 0 | sa_origin_x = -pad_width - org_x; |
813 | 0 | sa_width = sa_width - (-pad_width - (org_x + sa_origin_x)); |
814 | 0 | } |
815 | | |
816 | | // Correct the right edge of the Search Area if its not on the reference picture |
817 | 0 | if (((org_x + sa_origin_x) > (int16_t)sixteenth_ref_pic_ptr->width - 1)) { |
818 | 0 | sa_origin_x = sa_origin_x - ((org_x + sa_origin_x) - ((int16_t)sixteenth_ref_pic_ptr->width - 1)); |
819 | 0 | } |
820 | |
|
821 | 0 | if (((org_x + sa_origin_x + sa_width) > (int16_t)sixteenth_ref_pic_ptr->width)) { |
822 | 0 | sa_width = MAX(1, sa_width - ((org_x + sa_origin_x + sa_width) - (int16_t)sixteenth_ref_pic_ptr->width)); |
823 | 0 | } |
824 | | // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping alrea performed) |
825 | 0 | sa_width = (sa_width < 8) ? sa_width : sa_width & ~0x07; |
826 | | // Correct the top edge of the Search Area if it is not on the reference picture |
827 | 0 | if (((org_y + sa_origin_y) < -pad_height)) { |
828 | 0 | sa_origin_y = -pad_height - org_y; |
829 | 0 | sa_height = sa_height - (-pad_height - (org_y + sa_origin_y)); |
830 | 0 | } |
831 | | |
832 | | // Correct the bottom edge of the Search Area if its not on the reference picture |
833 | 0 | if (((org_y + sa_origin_y) > (int16_t)sixteenth_ref_pic_ptr->height - 1)) { |
834 | 0 | sa_origin_y = sa_origin_y - ((org_y + sa_origin_y) - ((int16_t)sixteenth_ref_pic_ptr->height - 1)); |
835 | 0 | } |
836 | |
|
837 | 0 | if ((org_y + sa_origin_y + sa_height > (int16_t)sixteenth_ref_pic_ptr->height)) { |
838 | 0 | sa_height = MAX(1, sa_height - ((org_y + sa_origin_y + sa_height) - (int16_t)sixteenth_ref_pic_ptr->height)); |
839 | 0 | } |
840 | | |
841 | | // Move to the top left of the search region |
842 | 0 | int16_t x_top_left_search_region = (org_x) + sa_origin_x; |
843 | 0 | int16_t y_top_left_search_region = (org_y) + sa_origin_y; |
844 | 0 | int32_t search_region_index = x_top_left_search_region + y_top_left_search_region * sixteenth_ref_pic_ptr->y_stride; |
845 | | |
846 | | // Put the first search location into level0 results |
847 | 0 | svt_sad_loop_kernel(&me_ctx->sixteenth_b64_buffer[0], |
848 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? me_ctx->sixteenth_b64_buffer_stride |
849 | 0 | : me_ctx->sixteenth_b64_buffer_stride * 2, |
850 | 0 | &sixteenth_ref_pic_ptr->y_buffer[search_region_index], |
851 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? sixteenth_ref_pic_ptr->y_stride |
852 | 0 | : sixteenth_ref_pic_ptr->y_stride * 2, |
853 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? block_height : block_height >> 1, |
854 | 0 | block_width, |
855 | | /* results */ |
856 | 0 | best_sad, |
857 | 0 | hme_l0_sc_x, |
858 | 0 | hme_l0_sc_y, |
859 | | /* range */ |
860 | 0 | sixteenth_ref_pic_ptr->y_stride, |
861 | 0 | 0, // skip search line |
862 | 0 | sa_width, |
863 | 0 | sa_height); |
864 | |
|
865 | 0 | *best_sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH) |
866 | 0 | ? *best_sad |
867 | 0 | : *best_sad * 2; // Multiply by 2 because considered only ever other line |
868 | 0 | *hme_l0_sc_x += sa_origin_x; |
869 | 0 | *hme_l0_sc_x *= 4; // Multiply by 4 because operating on 1/4 resolution |
870 | 0 | *hme_l0_sc_y += sa_origin_y; |
871 | 0 | *hme_l0_sc_y *= 4; // Multiply by 4 because operating on 1/4 resolution |
872 | |
|
873 | 0 | return; |
874 | 0 | } |
875 | | |
876 | | // Perform HME Level 1 for one 64x64 block on the given picture |
877 | | static void hme_level_1(MeContext* me_ctx, // ME context Ptr, used to get/update ME results |
878 | | int16_t org_x, // Block position in the horizontal direction - quarter resolution |
879 | | int16_t org_y, // Block position in the vertical direction - quarter resolution |
880 | | uint32_t block_width, // Block width - quarter resolution |
881 | | uint32_t block_height, // Block height - quarter resolution |
882 | | EbPictureBufferDesc* quarter_ref_pic_ptr, // quarter reference picture |
883 | | int16_t sa_width, // hme level 1 search area in width |
884 | | int16_t sa_height, // hme level 1 search area in height |
885 | | int16_t hme_l0_sc_x, // input parameter, best Level0 xMV at (sr_w, sr_h) |
886 | | int16_t hme_l0_sc_y, // input parameter, best Level0 yMV at (sr_w, sr_h) |
887 | | uint64_t* best_sad, // output parameter, Level1 SAD at (sr_w, sr_h) |
888 | | int16_t* hme_l1_sc_x, // output parameter, Level1 xMV at (sr_w, sr_h) |
889 | | int16_t* hme_l1_sc_y // output parameter, Level1 yMV at (sr_w, sr_h) |
890 | 0 | ) { |
891 | | // round up the search region width to nearest multiple of 8 because the SAD calculation performance (for |
892 | | // intrinsic functions) is the same for search region width from 1 to 8 |
893 | 0 | sa_width = (int16_t)((sa_width + 7) & ~0x07); |
894 | |
|
895 | 0 | int16_t pad_width = (int16_t)(quarter_ref_pic_ptr->border) - 1; |
896 | 0 | int16_t pad_height = (int16_t)(quarter_ref_pic_ptr->border) - 1; |
897 | |
|
898 | 0 | int16_t sa_origin_x = -(sa_width >> 1) + hme_l0_sc_x; |
899 | 0 | int16_t sa_origin_y = -(sa_height >> 1) + hme_l0_sc_y; |
900 | | |
901 | | // Correct the left edge of the Search Area if it is not on the reference picture |
902 | 0 | if (((org_x + sa_origin_x) < -pad_width)) { |
903 | 0 | sa_origin_x = -pad_width - org_x; |
904 | 0 | sa_width = sa_width - (-pad_width - (org_x + sa_origin_x)); |
905 | 0 | } |
906 | | |
907 | | // Correct the right edge of the Search Area if its not on the reference picture |
908 | 0 | if (((org_x + sa_origin_x) > (int16_t)quarter_ref_pic_ptr->width - 1)) { |
909 | 0 | sa_origin_x = sa_origin_x - ((org_x + sa_origin_x) - ((int16_t)quarter_ref_pic_ptr->width - 1)); |
910 | 0 | } |
911 | |
|
912 | 0 | if (((org_x + sa_origin_x + sa_width) > (int16_t)quarter_ref_pic_ptr->width)) { |
913 | 0 | sa_width = MAX(1, sa_width - ((org_x + sa_origin_x + sa_width) - (int16_t)quarter_ref_pic_ptr->width)); |
914 | 0 | } |
915 | | |
916 | | // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping alrea performed) |
917 | 0 | sa_width = (sa_width < 8) ? sa_width : sa_width & ~0x07; |
918 | | |
919 | | // Correct the top edge of the Search Area if it is not on the reference picture |
920 | 0 | if (((org_y + sa_origin_y) < -pad_height)) { |
921 | 0 | sa_origin_y = -pad_height - org_y; |
922 | 0 | sa_height = sa_height - (-pad_height - (org_y + sa_origin_y)); |
923 | 0 | } |
924 | | |
925 | | // Correct the bottom edge of the Search Area if its not on the reference picture |
926 | 0 | if (((org_y + sa_origin_y) > (int16_t)quarter_ref_pic_ptr->height - 1)) { |
927 | 0 | sa_origin_y = sa_origin_y - ((org_y + sa_origin_y) - ((int16_t)quarter_ref_pic_ptr->height - 1)); |
928 | 0 | } |
929 | |
|
930 | 0 | if ((org_y + sa_origin_y + sa_height > (int16_t)quarter_ref_pic_ptr->height)) { |
931 | 0 | sa_height = MAX(1, sa_height - ((org_y + sa_origin_y + sa_height) - (int16_t)quarter_ref_pic_ptr->height)); |
932 | 0 | } |
933 | | |
934 | | // Move to the top left of the search region |
935 | 0 | int16_t x_top_left_search_region = (org_x) + sa_origin_x; |
936 | 0 | int16_t y_top_left_search_region = (org_y) + sa_origin_y; |
937 | 0 | int32_t search_region_index = x_top_left_search_region + y_top_left_search_region * quarter_ref_pic_ptr->y_stride; |
938 | | |
939 | | // Put the first search location into level1 results |
940 | 0 | svt_sad_loop_kernel(&me_ctx->quarter_b64_buffer[0], |
941 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? me_ctx->quarter_b64_buffer_stride |
942 | 0 | : me_ctx->quarter_b64_buffer_stride * 2, |
943 | 0 | &quarter_ref_pic_ptr->y_buffer[search_region_index], |
944 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? quarter_ref_pic_ptr->y_stride |
945 | 0 | : quarter_ref_pic_ptr->y_stride * 2, |
946 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? block_height : block_height >> 1, |
947 | 0 | block_width, |
948 | | /* results */ |
949 | 0 | best_sad, |
950 | 0 | hme_l1_sc_x, |
951 | 0 | hme_l1_sc_y, |
952 | | /* range */ |
953 | 0 | quarter_ref_pic_ptr->y_stride, |
954 | 0 | 0, // skip search line |
955 | 0 | sa_width, |
956 | 0 | sa_height); |
957 | |
|
958 | 0 | *best_sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH) |
959 | 0 | ? *best_sad |
960 | 0 | : *best_sad * 2; // Multiply by 2 because considered only ever other line |
961 | 0 | *hme_l1_sc_x += sa_origin_x; |
962 | 0 | *hme_l1_sc_x *= 2; // Multiply by 2 because operating on 1/2 resolution |
963 | 0 | *hme_l1_sc_y += sa_origin_y; |
964 | 0 | *hme_l1_sc_y *= 2; // Multiply by 2 because operating on 1/2 resolution |
965 | |
|
966 | 0 | return; |
967 | 0 | } |
968 | | |
969 | | // Perform HME Level 2 for one 64x64 block on the given picture |
970 | | void hme_level_2(MeContext* me_ctx, // ME context Ptr, used to get/update ME results |
971 | | int16_t org_x, // Block position in the horizontal direction |
972 | | int16_t org_y, // Block position in the vertical direction |
973 | | uint32_t block_width, // Block pwidth - full resolution |
974 | | uint32_t block_height, // Block height - full resolution |
975 | | EbPictureBufferDesc* ref_pic_ptr, // reference picture |
976 | | int16_t sa_width, // hme level 1 search area in width |
977 | | int16_t sa_height, // hme level 1 search area in height |
978 | | int16_t hme_l1_sc_x, // best Level1 xMV at (sr_w, sr_h) |
979 | | int16_t hme_l1_sc_y, // best Level1 yMV at (sr_w, sr_h) |
980 | | uint64_t* best_sad, // Level2 SAD at (sr_w, sr_h) |
981 | | int16_t* hme_l2_sc_x, // Level2 xMV at (sr_w, sr_h) |
982 | | int16_t* hme_l2_sc_y // Level2 yMV at (sr_w, sr_h) |
983 | 0 | ) { |
984 | | // round up the search region width to nearest multiple of 8 because the SAD calculation performance (for |
985 | | // intrinsic functions) is the same for search region width from 1 to 8 |
986 | 0 | sa_width = (int16_t)((sa_width + 7) & ~0x07); |
987 | |
|
988 | 0 | int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1; |
989 | 0 | int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1; |
990 | |
|
991 | 0 | int16_t sa_origin_x = -(sa_width >> 1) + hme_l1_sc_x; |
992 | 0 | int16_t sa_origin_y = -(sa_height >> 1) + hme_l1_sc_y; |
993 | | |
994 | | // Correct the left edge of the Search Area if it is not on the reference picture |
995 | 0 | if (((org_x + sa_origin_x) < -pad_width)) { |
996 | 0 | sa_origin_x = -pad_width - org_x; |
997 | 0 | sa_width = sa_width - (-pad_width - (org_x + sa_origin_x)); |
998 | 0 | } |
999 | | |
1000 | | // Correct the right edge of the Search Area if its not on the reference picture |
1001 | 0 | if (((org_x + sa_origin_x) > (int16_t)ref_pic_ptr->width - 1)) { |
1002 | 0 | sa_origin_x = sa_origin_x - ((org_x + sa_origin_x) - ((int16_t)ref_pic_ptr->width - 1)); |
1003 | 0 | } |
1004 | |
|
1005 | 0 | if (((org_x + sa_origin_x + sa_width) > (int16_t)ref_pic_ptr->width)) { |
1006 | 0 | sa_width = MAX(1, sa_width - ((org_x + sa_origin_x + sa_width) - (int16_t)ref_pic_ptr->width)); |
1007 | 0 | } |
1008 | | |
1009 | | // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping already performed) |
1010 | 0 | sa_width = (sa_width < 8) ? sa_width : sa_width & ~0x07; |
1011 | | |
1012 | | // Correct the top edge of the Search Area if it is not on the reference picture |
1013 | 0 | if (((org_y + sa_origin_y) < -pad_height)) { |
1014 | 0 | sa_origin_y = -pad_height - org_y; |
1015 | 0 | sa_height = sa_height - (-pad_height - (org_y + sa_origin_y)); |
1016 | 0 | } |
1017 | | |
1018 | | // Correct the bottom edge of the Search Area if its not on the reference picture |
1019 | 0 | if (((org_y + sa_origin_y) > (int16_t)ref_pic_ptr->height - 1)) { |
1020 | 0 | sa_origin_y = sa_origin_y - ((org_y + sa_origin_y) - ((int16_t)ref_pic_ptr->height - 1)); |
1021 | 0 | } |
1022 | |
|
1023 | 0 | if ((org_y + sa_origin_y + sa_height > (int16_t)ref_pic_ptr->height)) { |
1024 | 0 | sa_height = MAX(1, sa_height - ((org_y + sa_origin_y + sa_height) - (int16_t)ref_pic_ptr->height)); |
1025 | 0 | } |
1026 | | |
1027 | | // Move to the top left of the search region |
1028 | 0 | int16_t x_top_left_search_region = (org_x) + sa_origin_x; |
1029 | 0 | int16_t y_top_left_search_region = (org_y) + sa_origin_y; |
1030 | 0 | int32_t search_region_index = x_top_left_search_region + y_top_left_search_region * ref_pic_ptr->y_stride; |
1031 | | |
1032 | | // Put the first search location into level2 results |
1033 | 0 | svt_sad_loop_kernel( |
1034 | 0 | me_ctx->b64_src_ptr, |
1035 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? me_ctx->b64_src_stride : me_ctx->b64_src_stride * 2, |
1036 | 0 | &ref_pic_ptr->y_buffer[search_region_index], |
1037 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? ref_pic_ptr->y_stride : ref_pic_ptr->y_stride * 2, |
1038 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? block_height : block_height >> 1, |
1039 | 0 | block_width, |
1040 | | /* results */ |
1041 | 0 | best_sad, |
1042 | 0 | hme_l2_sc_x, |
1043 | 0 | hme_l2_sc_y, |
1044 | | /* range */ |
1045 | 0 | ref_pic_ptr->y_stride, |
1046 | 0 | 0, // skip search line |
1047 | 0 | sa_width, |
1048 | 0 | sa_height); |
1049 | |
|
1050 | 0 | *best_sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH) |
1051 | 0 | ? *best_sad |
1052 | 0 | : *best_sad * 2; // Multiply by 2 because considered only ever other line |
1053 | 0 | *hme_l2_sc_x += sa_origin_x; |
1054 | 0 | *hme_l2_sc_y += sa_origin_y; |
1055 | |
|
1056 | 0 | return; |
1057 | 0 | } |
1058 | | |
1059 | | uint32_t check_00_center(EbPictureBufferDesc* ref_pic_ptr, MeContext* me_ctx, uint32_t sb_origin_x, |
1060 | | uint32_t sb_origin_y, uint32_t sb_width, uint32_t sb_height, int16_t* x_search_center, |
1061 | | int16_t* y_search_center, uint32_t zz_sad) |
1062 | | |
1063 | 0 | { |
1064 | 0 | const int16_t org_x = (int16_t)sb_origin_x; |
1065 | 0 | const int16_t org_y = (int16_t)sb_origin_y; |
1066 | 0 | const int subsample_sad = 1; |
1067 | 0 | const int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1; |
1068 | 0 | const int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1; |
1069 | |
|
1070 | 0 | int32_t search_region_index = org_x + (org_y)*ref_pic_ptr->y_stride; |
1071 | 0 | uint64_t zero_mv_sad; |
1072 | 0 | if (me_ctx->me_early_exit_th) { |
1073 | 0 | zero_mv_sad = zz_sad; |
1074 | 0 | } else { |
1075 | 0 | zero_mv_sad = svt_nxm_sad_kernel(me_ctx->b64_src_ptr, |
1076 | 0 | me_ctx->b64_src_stride << subsample_sad, |
1077 | 0 | &(ref_pic_ptr->y_buffer[search_region_index]), |
1078 | 0 | ref_pic_ptr->y_stride << subsample_sad, |
1079 | 0 | sb_height >> subsample_sad, |
1080 | 0 | sb_width); |
1081 | 0 | } |
1082 | |
|
1083 | 0 | zero_mv_sad = zero_mv_sad << subsample_sad; |
1084 | | |
1085 | | // FIX |
1086 | | // Correct the left edge of the Search Area if it is not on the reference |
1087 | | // Picture |
1088 | 0 | *x_search_center = ((org_x + *x_search_center) < -pad_width) ? -pad_width - org_x : *x_search_center; |
1089 | | // Correct the right edge of the Search Area if its not on the reference |
1090 | | // Picture |
1091 | 0 | *x_search_center = ((org_x + *x_search_center) > (int16_t)ref_pic_ptr->width - 1) |
1092 | 0 | ? *x_search_center - ((org_x + *x_search_center) - ((int16_t)ref_pic_ptr->width - 1)) |
1093 | 0 | : *x_search_center; |
1094 | | // Correct the top edge of the Search Area if it is not on the reference |
1095 | | // Picture |
1096 | 0 | *y_search_center = ((org_y + *y_search_center) < -pad_height) ? -pad_height - org_y : *y_search_center; |
1097 | | // Correct the bottom edge of the Search Area if its not on the reference |
1098 | | // Picture |
1099 | 0 | *y_search_center = ((org_y + *y_search_center) > (int16_t)ref_pic_ptr->height - 1) |
1100 | 0 | ? *y_search_center - ((org_y + *y_search_center) - ((int16_t)ref_pic_ptr->height - 1)) |
1101 | 0 | : *y_search_center; |
1102 | | /// |
1103 | |
|
1104 | 0 | uint64_t zero_mv_cost = zero_mv_sad << COST_PRECISION; |
1105 | 0 | search_region_index = (int16_t)(org_x) + *x_search_center + |
1106 | 0 | ((int16_t)(org_y) + *y_search_center) * ref_pic_ptr->y_stride; |
1107 | |
|
1108 | 0 | uint64_t hme_mv_sad = svt_nxm_sad_kernel(me_ctx->b64_src_ptr, |
1109 | 0 | me_ctx->b64_src_stride << subsample_sad, |
1110 | 0 | &(ref_pic_ptr->y_buffer[search_region_index]), |
1111 | 0 | ref_pic_ptr->y_stride << subsample_sad, |
1112 | 0 | sb_height >> subsample_sad, |
1113 | 0 | sb_width); |
1114 | |
|
1115 | 0 | hme_mv_sad = hme_mv_sad << subsample_sad; |
1116 | 0 | uint64_t hme_mv_cost = hme_mv_sad << COST_PRECISION; |
1117 | 0 | uint64_t search_center_cost = MIN(zero_mv_cost, hme_mv_cost); |
1118 | |
|
1119 | 0 | *x_search_center = (search_center_cost == zero_mv_cost) ? 0 : *x_search_center; |
1120 | 0 | *y_search_center = (search_center_cost == zero_mv_cost) ? 0 : *y_search_center; |
1121 | 0 | return hme_mv_sad; |
1122 | 0 | } |
1123 | | |
1124 | | // get ME references based on level: |
1125 | | // level: 0 => sixteenth, 1 => quarter, 2 => original |
1126 | | |
1127 | | static EbPictureBufferDesc* get_me_reference(PictureParentControlSet* pcs, MeContext* me_ctx, uint8_t list_index, |
1128 | | uint8_t ref_pic_index, uint8_t level, uint16_t* dist, uint16_t input_width, |
1129 | 0 | uint16_t input_height) { |
1130 | 0 | EbPictureBufferDesc* ref_pic_ptr; |
1131 | 0 | ref_pic_ptr = level == 0 ? me_ctx->me_ds_ref_array[list_index][ref_pic_index].sixteenth_picture_ptr |
1132 | 0 | : level == 1 ? me_ctx->me_ds_ref_array[list_index][ref_pic_index].quarter_picture_ptr |
1133 | 0 | : me_ctx->me_ds_ref_array[list_index][ref_pic_index].picture_ptr; |
1134 | |
|
1135 | 0 | if ((input_width >> (2 - level)) != ref_pic_ptr->width || (input_height >> (2 - level)) != ref_pic_ptr->height) { |
1136 | 0 | SVT_WARN("picture %3llu: HME level%d resolution mismatch! input (%dx%d) != (%dx%d) pa ref. \n", |
1137 | 0 | pcs->picture_number, |
1138 | 0 | level, |
1139 | 0 | input_width >> (2 - level), |
1140 | 0 | input_height >> (2 - level), |
1141 | 0 | ref_pic_ptr->width, |
1142 | 0 | ref_pic_ptr->height); |
1143 | 0 | } |
1144 | |
|
1145 | 0 | *dist = (int16_t)ABS((int64_t)pcs->picture_number - |
1146 | 0 | (int64_t)me_ctx->me_ds_ref_array[list_index][ref_pic_index].picture_number); |
1147 | 0 | return ref_pic_ptr; |
1148 | 0 | } |
1149 | | |
1150 | | // factor to slowdown the ME search region growth to MAX |
1151 | 0 | uint16_t svt_aom_get_scaled_picture_distance(uint16_t dist) { |
1152 | 0 | uint8_t round_up = ((dist % 8) == 0) ? 0 : 1; |
1153 | 0 | return ((dist * 5) / 8) + round_up; |
1154 | 0 | } |
1155 | | |
1156 | | static const double search_area_multipliers[3][5] = { |
1157 | | {1.0, 1.0, 3.0, 4.0, 5.0}, /* boost=1 */ |
1158 | | {1.0, 1.0, 2.5, 3.5, 4.5}, /* boost=2 */ |
1159 | | {1.0, 1.0, 2.0, 2.5, 3.5} /* boost=3 */ |
1160 | | }; |
1161 | | |
1162 | 0 | static void apply_me_sa_boost(int16_t* width, int16_t* height, uint64_t hme_sad, int sc_class_me_boost) { |
1163 | 0 | int index; |
1164 | 0 | if (hme_sad > 4 * 64 * 64) { |
1165 | 0 | index = 4; |
1166 | 0 | } else if (hme_sad > 3 * 64 * 64) { |
1167 | 0 | index = 3; |
1168 | 0 | } else if (hme_sad > 2 * 64 * 64) { |
1169 | 0 | index = 2; |
1170 | 0 | } else { |
1171 | 0 | index = 0; |
1172 | 0 | } |
1173 | |
|
1174 | 0 | const double mult = search_area_multipliers[sc_class_me_boost - 1][index]; |
1175 | |
|
1176 | 0 | *width = (int16_t)(*width * mult); |
1177 | 0 | *height = (int16_t)(*height * mult); |
1178 | 0 | } |
1179 | | |
1180 | | /******************************************* |
1181 | | * performs integer search motion estimation for |
1182 | | all avaiable references frames |
1183 | | *******************************************/ |
1184 | | static void integer_search_b64(PictureParentControlSet* pcs, MeContext* me_ctx, uint32_t b64_origin_x, |
1185 | 0 | uint32_t b64_origin_y, EbPictureBufferDesc* input_ptr) { |
1186 | 0 | int16_t picture_width = pcs->aligned_width; |
1187 | 0 | int16_t picture_height = pcs->aligned_height; |
1188 | 0 | uint32_t b64_width = me_ctx->b64_width; |
1189 | 0 | uint32_t b64_height = me_ctx->b64_height; |
1190 | 0 | int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1; |
1191 | 0 | int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1; |
1192 | 0 | int16_t org_x = (int16_t)b64_origin_x; |
1193 | 0 | int16_t org_y = (int16_t)b64_origin_y; |
1194 | 0 | int16_t search_area_width; |
1195 | 0 | int16_t search_area_height; |
1196 | 0 | int16_t x_search_area_origin; |
1197 | 0 | int16_t y_search_area_origin; |
1198 | 0 | int16_t x_top_left_search_region; |
1199 | 0 | int16_t y_top_left_search_region; |
1200 | 0 | int32_t search_region_index; |
1201 | 0 | uint32_t num_of_list_to_search; |
1202 | 0 | uint32_t list_index; |
1203 | 0 | uint8_t ref_pic_index; |
1204 | | // Final ME Search Center |
1205 | 0 | int16_t x_search_center = 0; |
1206 | 0 | int16_t y_search_center = 0; |
1207 | 0 | EbPictureBufferDesc* ref_pic_ptr; |
1208 | 0 | num_of_list_to_search = me_ctx->num_of_list_to_search; |
1209 | | |
1210 | | // Uni-Prediction motion estimation loop |
1211 | | // List Loop |
1212 | 0 | for (list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) { |
1213 | 0 | uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
1214 | | |
1215 | | // Ref Picture Loop |
1216 | 0 | for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) { |
1217 | 0 | uint16_t dist = 0; |
1218 | 0 | ref_pic_ptr = get_me_reference( |
1219 | 0 | pcs, me_ctx, list_index, ref_pic_index, 2, &dist, input_ptr->width, input_ptr->height); |
1220 | | // Get hme results |
1221 | 0 | if (me_ctx->search_results[list_index][ref_pic_index].do_ref == 0) { |
1222 | 0 | continue; //so will not get ME results for those references. |
1223 | 0 | } |
1224 | 0 | x_search_center = me_ctx->search_results[list_index][ref_pic_index].hme_sc_x; |
1225 | 0 | y_search_center = me_ctx->search_results[list_index][ref_pic_index].hme_sc_y; |
1226 | 0 | search_area_width = me_ctx->me_sa.sa_min.width; |
1227 | 0 | search_area_height = me_ctx->me_sa.sa_min.height; |
1228 | | |
1229 | | // factor to slowdown the ME search region growth to MAX |
1230 | 0 | if (me_ctx->me_type != ME_MCTF) { |
1231 | 0 | dist = svt_aom_get_scaled_picture_distance(dist); |
1232 | 0 | } |
1233 | 0 | search_area_width = MIN((search_area_width * dist), me_ctx->me_sa.sa_max.width); |
1234 | 0 | search_area_height = MIN((search_area_height * dist), me_ctx->me_sa.sa_max.height); |
1235 | 0 | if (me_ctx->mv_based_sa_adj.enabled && (!me_ctx->mv_based_sa_adj.nearest_ref_only || ref_pic_index == 0)) { |
1236 | 0 | if (ABS(x_search_center) > me_ctx->mv_based_sa_adj.mv_size_th) { |
1237 | 0 | search_area_width *= me_ctx->mv_based_sa_adj.sa_multiplier; |
1238 | 0 | } |
1239 | 0 | if (ABS(y_search_center) > me_ctx->mv_based_sa_adj.mv_size_th) { |
1240 | 0 | search_area_height *= me_ctx->mv_based_sa_adj.sa_multiplier; |
1241 | 0 | } |
1242 | 0 | } |
1243 | 0 | if (me_ctx->sc_class_me_boost && |
1244 | 0 | (pcs->ahd_error == (uint32_t)~0 || // Use ahd_error only when it is derived |
1245 | 0 | pcs->ahd_error < |
1246 | 0 | ((((20 * pcs->enhanced_pic->width * pcs->enhanced_pic->height) / 128)) * |
1247 | 0 | (uint32_t)(INPUT_SIZE_COUNT - |
1248 | 0 | pcs->input_resolution)))) { // Only if there are low temporal variations between frames |
1249 | 0 | const uint64_t hme_sad = me_ctx->search_results[list_index][ref_pic_index].hme_sad; |
1250 | 0 | apply_me_sa_boost(&search_area_width, &search_area_height, hme_sad, me_ctx->sc_class_me_boost); |
1251 | 0 | } |
1252 | | // Constrain x_ME to be a multiple of 8 (round up) |
1253 | | // Update ME search reagion size based on hme-data |
1254 | 0 | search_area_width = (MAX(1, (search_area_width / me_ctx->reduce_me_sr_divisor[list_index][ref_pic_index])) + |
1255 | 0 | 7) & |
1256 | 0 | ~0x07; |
1257 | 0 | search_area_height = MAX(3, (search_area_height / me_ctx->reduce_me_sr_divisor[list_index][ref_pic_index])); |
1258 | 0 | int16_t search_area_height_before_sr_reduction = search_area_height; |
1259 | 0 | uint64_t best_hme_sad = (uint64_t)~0; |
1260 | 0 | if (me_ctx->me_early_exit_th) { |
1261 | 0 | if (me_ctx->zz_sad[list_index][ref_pic_index] < (me_ctx->me_early_exit_th / 6)) { |
1262 | 0 | search_area_width = 1; |
1263 | 0 | search_area_height = 1; |
1264 | 0 | } |
1265 | 0 | } else { |
1266 | 0 | uint8_t hme_is_accuarte = 1; |
1267 | 0 | if ((x_search_center != 0 || y_search_center != 0) && (me_ctx->is_ref == true)) { |
1268 | 0 | best_hme_sad = check_00_center(ref_pic_ptr, |
1269 | 0 | me_ctx, |
1270 | 0 | b64_origin_x, |
1271 | 0 | b64_origin_y, |
1272 | 0 | b64_width, |
1273 | 0 | b64_height, |
1274 | 0 | &x_search_center, |
1275 | 0 | &y_search_center, |
1276 | 0 | me_ctx->zz_sad[list_index][ref_pic_index]); |
1277 | |
|
1278 | 0 | if (x_search_center == 0 && y_search_center == 0) { |
1279 | 0 | hme_is_accuarte = 0; |
1280 | 0 | } |
1281 | 0 | } |
1282 | 0 | if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment == 2) { |
1283 | 0 | if ((hme_is_accuarte && (best_hme_sad < (24 * 24))) || |
1284 | 0 | (me_ctx->is_ref && me_ctx->search_results[list_index][ref_pic_index].hme_sad < (24 * 24))) { |
1285 | 0 | search_area_height = search_area_height / 2; |
1286 | 0 | } |
1287 | 0 | if (list_index || ref_pic_index) { |
1288 | 0 | if (me_ctx->p_sb_best_sad[0][0][0] < 5000) { |
1289 | 0 | if (search_area_height == search_area_height_before_sr_reduction) { |
1290 | 0 | search_area_height = search_area_height >> 1; |
1291 | 0 | search_area_width = search_area_width >> 1; |
1292 | 0 | } |
1293 | 0 | } |
1294 | 0 | } |
1295 | 0 | } |
1296 | 0 | } |
1297 | 0 | svt_initialize_buffer_32bits(me_ctx->p_sb_best_sad[list_index][ref_pic_index], 21, 1, MAX_SAD_VALUE); |
1298 | 0 | me_ctx->p_best_sad_64x64 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_64x64]); |
1299 | 0 | me_ctx->p_best_sad_32x32 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_32x32_0]); |
1300 | 0 | me_ctx->p_best_sad_16x16 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_16x16_0]); |
1301 | 0 | me_ctx->p_best_sad_8x8 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_8x8_0]); |
1302 | |
|
1303 | 0 | me_ctx->p_best_mv64x64 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_64x64]); |
1304 | 0 | me_ctx->p_best_mv32x32 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_32x32_0]); |
1305 | 0 | me_ctx->p_best_mv16x16 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_16x16_0]); |
1306 | 0 | me_ctx->p_best_mv8x8 = &(me_ctx->p_sb_best_mv[list_index][ref_pic_index][ME_TIER_ZERO_PU_8x8_0]); |
1307 | | |
1308 | | /* If search area is large enough, check the ME 8x8 SAD variance, and if low, reduce search area |
1309 | | * (as the 64x64 MVs are likely good for all the 8x8 blocks that make it up). If the search area |
1310 | | * is already low, the overhead of searching one additional point will be high (and fruitless, since |
1311 | | * the minimum search size that will be set by the 8x8 SAD variance algorithm is 8x3. |
1312 | | */ |
1313 | 0 | if (me_ctx->me_8x8_var_ctrls.enabled && (search_area_width * search_area_height > 24)) { |
1314 | 0 | x_search_area_origin = x_search_center; |
1315 | 0 | y_search_area_origin = y_search_center; |
1316 | 0 | x_top_left_search_region = (int16_t)(b64_origin_x) - (ME_FILTER_TAP >> 1) + x_search_area_origin; |
1317 | 0 | y_top_left_search_region = (int16_t)(b64_origin_y) - (ME_FILTER_TAP >> 1) + y_search_area_origin; |
1318 | 0 | search_region_index = (x_top_left_search_region) + (y_top_left_search_region)*ref_pic_ptr->y_stride; |
1319 | 0 | me_ctx->integer_buffer_ptr[list_index][ref_pic_index] = &(ref_pic_ptr->y_buffer[search_region_index]); |
1320 | 0 | me_ctx->interpolated_full_stride[list_index][ref_pic_index] = ref_pic_ptr->y_stride; |
1321 | |
|
1322 | 0 | open_loop_me_fullpel_search_sblock( |
1323 | 0 | me_ctx, list_index, ref_pic_index, x_search_center, y_search_center, 1, 1); |
1324 | | |
1325 | | // Since only one point was searched, the 64x64 SAD will be the same as the sum of the 8x8 SADs |
1326 | 0 | const uint32_t mean_dist_8x8 = me_ctx->p_best_sad_64x64[0] / 64; |
1327 | 0 | uint32_t sum_ofsq_dist_8x8 = 0; |
1328 | 0 | for (unsigned i = 0; i < 64; i++) { |
1329 | 0 | const int32_t diff = ((int32_t)me_ctx->p_best_sad_8x8[i] - (int32_t)mean_dist_8x8); |
1330 | 0 | sum_ofsq_dist_8x8 += diff * diff; |
1331 | 0 | } |
1332 | |
|
1333 | 0 | uint32_t me_8x8_cost_var = (uint32_t)(sum_ofsq_dist_8x8 / 64); |
1334 | |
|
1335 | 0 | if (me_8x8_cost_var > me_ctx->me_8x8_var_ctrls.me_sr_mult2_th) { |
1336 | 0 | search_area_width = (MAX(1, search_area_width * 3 / 2) + 7) & ~0x7; |
1337 | 0 | search_area_height = MAX(1, search_area_height * 3 / 2); |
1338 | 0 | } |
1339 | |
|
1340 | 0 | if (me_8x8_cost_var < me_ctx->me_8x8_var_ctrls.me_sr_div4_th) { |
1341 | 0 | search_area_width = (MAX(1, search_area_width >> 2) + 7) & ~0x7; |
1342 | 0 | search_area_height = MAX(1, search_area_height >> 2); |
1343 | 0 | search_area_height = MAX(3, search_area_height); |
1344 | 0 | } else if (me_8x8_cost_var < me_ctx->me_8x8_var_ctrls.me_sr_div2_th) { |
1345 | 0 | search_area_width = (MIN(search_area_width, search_area_width >> 1) + 7) & ~0x7; |
1346 | 0 | search_area_height = MIN(search_area_height, search_area_height >> 1); |
1347 | 0 | search_area_height = MAX(3, search_area_height); |
1348 | 0 | } |
1349 | 0 | } |
1350 | 0 | x_search_area_origin = x_search_center - (search_area_width >> 1); |
1351 | 0 | y_search_area_origin = y_search_center - (search_area_height >> 1); |
1352 | | |
1353 | | // Correct the left edge of the Search Area if it is not on the |
1354 | | // reference Picture |
1355 | 0 | x_search_area_origin = ((org_x + x_search_area_origin) < -pad_width) ? -pad_width - org_x |
1356 | 0 | : x_search_area_origin; |
1357 | 0 | search_area_width = ((org_x + x_search_area_origin) < -pad_width) |
1358 | 0 | ? search_area_width - (-pad_width - (org_x + x_search_area_origin)) |
1359 | 0 | : search_area_width; |
1360 | | // Correct the right edge of the Search Area if its not on the |
1361 | | // reference Picture |
1362 | 0 | x_search_area_origin = ((org_x + x_search_area_origin) > picture_width - 1) |
1363 | 0 | ? x_search_area_origin - ((org_x + x_search_area_origin) - (picture_width - 1)) |
1364 | 0 | : x_search_area_origin; |
1365 | |
|
1366 | 0 | search_area_width = ((org_x + x_search_area_origin + search_area_width) > picture_width) |
1367 | 0 | ? MAX(1, search_area_width - ((org_x + x_search_area_origin + search_area_width) - picture_width)) |
1368 | 0 | : search_area_width; |
1369 | | |
1370 | | // Constrain x_ME to be a multiple of 8 (round down as cropping |
1371 | | // already performed) |
1372 | 0 | search_area_width = (search_area_width < 8) ? search_area_width : search_area_width & ~0x07; |
1373 | | |
1374 | | // Correct the top edge of the Search Area if it is not on the |
1375 | | // reference Picture |
1376 | 0 | y_search_area_origin = ((org_y + y_search_area_origin) < -pad_height) ? -pad_height - org_y |
1377 | 0 | : y_search_area_origin; |
1378 | 0 | search_area_height = ((org_y + y_search_area_origin) < -pad_height) |
1379 | 0 | ? search_area_height - (-pad_height - (org_y + y_search_area_origin)) |
1380 | 0 | : search_area_height; |
1381 | | // Correct the bottom edge of the Search Area if its not on the |
1382 | | // reference Picture |
1383 | 0 | y_search_area_origin = ((org_y + y_search_area_origin) > picture_height - 1) |
1384 | 0 | ? y_search_area_origin - ((org_y + y_search_area_origin) - (picture_height - 1)) |
1385 | 0 | : y_search_area_origin; |
1386 | 0 | search_area_height = (org_y + y_search_area_origin + search_area_height > picture_height) |
1387 | 0 | ? MAX(1, search_area_height - ((org_y + y_search_area_origin + search_area_height) - picture_height)) |
1388 | 0 | : search_area_height; |
1389 | |
|
1390 | 0 | x_top_left_search_region = (int16_t)(b64_origin_x) - (ME_FILTER_TAP >> 1) + x_search_area_origin; |
1391 | 0 | y_top_left_search_region = (int16_t)(b64_origin_y) - (ME_FILTER_TAP >> 1) + y_search_area_origin; |
1392 | 0 | search_region_index = (x_top_left_search_region) + (y_top_left_search_region)*ref_pic_ptr->y_stride; |
1393 | 0 | me_ctx->integer_buffer_ptr[list_index][ref_pic_index] = &(ref_pic_ptr->y_buffer[search_region_index]); |
1394 | 0 | me_ctx->interpolated_full_stride[list_index][ref_pic_index] = ref_pic_ptr->y_stride; |
1395 | | |
1396 | | // Move to the top left of the search region |
1397 | 0 | x_top_left_search_region = (int16_t)(b64_origin_x) + x_search_area_origin; |
1398 | 0 | y_top_left_search_region = (int16_t)(b64_origin_y) + y_search_area_origin; |
1399 | 0 | open_loop_me_fullpel_search_sblock(me_ctx, |
1400 | 0 | list_index, |
1401 | 0 | ref_pic_index, |
1402 | 0 | x_search_area_origin, |
1403 | 0 | y_search_area_origin, |
1404 | 0 | search_area_width, |
1405 | 0 | search_area_height); |
1406 | 0 | } |
1407 | 0 | } |
1408 | 0 | } |
1409 | | |
1410 | | /* |
1411 | | using previous stage ME results (Integer Search) for each reference |
1412 | | frame. keep only the references that are close to the best reference. |
1413 | | */ |
1414 | 0 | static void me_prune_ref(MeContext* me_ctx) { |
1415 | 0 | uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search; |
1416 | 0 | for (uint8_t list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) { |
1417 | 0 | uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
1418 | | // Ref Picture Loop |
1419 | 0 | for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) { |
1420 | 0 | me_ctx->search_results[list_index][ref_pic_index].hme_sad = 0; |
1421 | | // Get hme results |
1422 | 0 | if (me_ctx->search_results[list_index][ref_pic_index].do_ref == 0) { |
1423 | 0 | me_ctx->search_results[list_index][ref_pic_index].hme_sad = MAX_SAD_VALUE * 64; |
1424 | 0 | continue; |
1425 | 0 | } |
1426 | 0 | me_ctx->p_best_sad_8x8 = &(me_ctx->p_sb_best_sad[list_index][ref_pic_index][ME_TIER_ZERO_PU_8x8_0]); |
1427 | | // 8x8 [64 partitions] |
1428 | 0 | for (uint32_t pu_index = 0; pu_index < 64; ++pu_index) { |
1429 | 0 | uint32_t idx = tab8x8[pu_index]; |
1430 | 0 | me_ctx->search_results[list_index][ref_pic_index].hme_sad += me_ctx->p_best_sad_8x8[idx]; |
1431 | 0 | } |
1432 | 0 | } |
1433 | 0 | } |
1434 | |
|
1435 | 0 | uint16_t prune_ref_th = me_ctx->me_hme_prune_ctrls.prune_ref_if_me_sad_dev_bigger_than_th; |
1436 | 0 | if (me_ctx->me_hme_prune_ctrls.enable_me_hme_ref_pruning && prune_ref_th != (uint16_t)~0) { |
1437 | 0 | uint64_t best = (uint64_t)~0; |
1438 | 0 | for (int i = 0; i < MAX_NUM_OF_REF_PIC_LIST; ++i) { |
1439 | 0 | for (int j = 0; j < REF_LIST_MAX_DEPTH; ++j) { |
1440 | 0 | if (me_ctx->search_results[i][j].hme_sad < best) { |
1441 | 0 | best = me_ctx->search_results[i][j].hme_sad; |
1442 | 0 | } |
1443 | 0 | } |
1444 | 0 | } |
1445 | 0 | for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) { |
1446 | 0 | for (uint32_t ri = 1; ri < REF_LIST_MAX_DEPTH; ri++) { |
1447 | | // Prune references based on ME sad |
1448 | 0 | if ((me_ctx->search_results[li][ri].hme_sad - best) * 100 > (prune_ref_th * best)) { |
1449 | 0 | me_ctx->search_results[li][ri].do_ref = 0; |
1450 | 0 | } |
1451 | 0 | } |
1452 | 0 | } |
1453 | 0 | } |
1454 | 0 | } |
1455 | | |
1456 | | /* perform motion search over a given search area*/ |
1457 | | static void prehme_core(MeContext* me_ctx, int16_t org_x, int16_t org_y, uint32_t sb_width, uint32_t sb_height, |
1458 | 0 | EbPictureBufferDesc* sixteenth_ref_pic_ptr, SearchInfo* prehme_data) { |
1459 | 0 | int16_t x_top_left_search_region; |
1460 | 0 | int16_t y_top_left_search_region; |
1461 | 0 | int32_t search_region_index; |
1462 | |
|
1463 | 0 | int16_t pad_width = (int16_t)(sixteenth_ref_pic_ptr->border) - 1; |
1464 | 0 | int16_t pad_height = (int16_t)(sixteenth_ref_pic_ptr->border) - 1; |
1465 | |
|
1466 | 0 | int16_t search_area_width = prehme_data->sa.width; |
1467 | 0 | int16_t search_area_height = prehme_data->sa.height; |
1468 | |
|
1469 | 0 | int16_t x_search_area_origin = -(int16_t)(search_area_width >> 1); |
1470 | 0 | int16_t y_search_area_origin = -(int16_t)(search_area_height >> 1); |
1471 | | |
1472 | | // Correct the left edge of the Search Area if it is not on the reference Picture |
1473 | 0 | x_search_area_origin = ((org_x + x_search_area_origin) < -pad_width) ? -pad_width - org_x : x_search_area_origin; |
1474 | |
|
1475 | 0 | search_area_width = ((org_x + x_search_area_origin) < -pad_width) |
1476 | 0 | ? search_area_width - (-pad_width - (org_x + x_search_area_origin)) |
1477 | 0 | : search_area_width; |
1478 | | |
1479 | | // Correct the right edge of the Search Area if its not on the reference Picture |
1480 | 0 | x_search_area_origin = ((org_x + x_search_area_origin) > (int16_t)sixteenth_ref_pic_ptr->width - 1) |
1481 | 0 | ? x_search_area_origin - ((org_x + x_search_area_origin) - ((int16_t)sixteenth_ref_pic_ptr->width - 1)) |
1482 | 0 | : x_search_area_origin; |
1483 | |
|
1484 | 0 | search_area_width = ((org_x + x_search_area_origin + search_area_width) > (int16_t)sixteenth_ref_pic_ptr->width) |
1485 | 0 | ? MAX(1, |
1486 | 0 | search_area_width - |
1487 | 0 | ((org_x + x_search_area_origin + search_area_width) - (int16_t)sixteenth_ref_pic_ptr->width)) |
1488 | 0 | : search_area_width; |
1489 | | |
1490 | | // Correct the top edge of the Search Area if it is not on the reference Picture |
1491 | 0 | y_search_area_origin = ((org_y + y_search_area_origin) < -pad_height) ? -pad_height - org_y : y_search_area_origin; |
1492 | |
|
1493 | 0 | search_area_height = ((org_y + y_search_area_origin) < -pad_height) |
1494 | 0 | ? search_area_height - (-pad_height - (org_y + y_search_area_origin)) |
1495 | 0 | : search_area_height; |
1496 | | |
1497 | | // Correct the bottom edge of the Search Area if its not on the reference Picture |
1498 | 0 | y_search_area_origin = ((org_y + y_search_area_origin) > (int16_t)sixteenth_ref_pic_ptr->height - 1) |
1499 | 0 | ? y_search_area_origin - ((org_y + y_search_area_origin) - ((int16_t)sixteenth_ref_pic_ptr->height - 1)) |
1500 | 0 | : y_search_area_origin; |
1501 | |
|
1502 | 0 | search_area_height = (org_y + y_search_area_origin + search_area_height > (int16_t)sixteenth_ref_pic_ptr->height) |
1503 | 0 | ? MAX(1, |
1504 | 0 | search_area_height - |
1505 | 0 | ((org_y + y_search_area_origin + search_area_height) - (int16_t)sixteenth_ref_pic_ptr->height)) |
1506 | 0 | : search_area_height; |
1507 | |
|
1508 | 0 | x_top_left_search_region = (org_x) + x_search_area_origin; |
1509 | 0 | y_top_left_search_region = (org_y) + y_search_area_origin; |
1510 | 0 | search_region_index = x_top_left_search_region + y_top_left_search_region * sixteenth_ref_pic_ptr->y_stride; |
1511 | |
|
1512 | 0 | svt_sad_loop_kernel(&me_ctx->sixteenth_b64_buffer[0], |
1513 | 0 | me_ctx->hme_search_method == FULL_SAD_SEARCH ? me_ctx->sixteenth_b64_buffer_stride |
1514 | 0 | : me_ctx->sixteenth_b64_buffer_stride * 2, |
1515 | 0 | &sixteenth_ref_pic_ptr->y_buffer[search_region_index], |
1516 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? sixteenth_ref_pic_ptr->y_stride |
1517 | 0 | : sixteenth_ref_pic_ptr->y_stride * 2, |
1518 | 0 | (me_ctx->hme_search_method == FULL_SAD_SEARCH) ? sb_height : sb_height >> 1, |
1519 | 0 | sb_width, |
1520 | | /* results */ |
1521 | 0 | &prehme_data->sad, |
1522 | 0 | &prehme_data->best_mv.x, |
1523 | 0 | &prehme_data->best_mv.y, |
1524 | 0 | sixteenth_ref_pic_ptr->y_stride, |
1525 | 0 | me_ctx->prehme_ctrl.skip_search_line, |
1526 | 0 | search_area_width, |
1527 | 0 | search_area_height); |
1528 | |
|
1529 | 0 | prehme_data->sad = (me_ctx->hme_search_method == FULL_SAD_SEARCH) |
1530 | 0 | ? prehme_data->sad |
1531 | 0 | : prehme_data->sad * 2; // Multiply by 2 because considered only ever other line |
1532 | 0 | prehme_data->best_mv.x += x_search_area_origin; |
1533 | 0 | prehme_data->best_mv.x *= 4; // Multiply by 4 because operating on 1/4 resolution |
1534 | 0 | prehme_data->best_mv.y += y_search_area_origin; |
1535 | 0 | prehme_data->best_mv.y *= 4; // Multiply by 4 because operating on 1/4 resolution |
1536 | 0 | prehme_data->valid = 1; |
1537 | 0 | return; |
1538 | 0 | } |
1539 | | |
1540 | | static uint32_t get_zz_sad(EbPictureBufferDesc* ref_pic_ptr, MeContext* me_ctx, uint32_t sb_origin_x, |
1541 | | uint32_t sb_origin_y, uint32_t sb_width, uint32_t sb_height) |
1542 | | |
1543 | 0 | { |
1544 | 0 | uint32_t zero_mv_sad; |
1545 | 0 | int16_t org_x = (int16_t)sb_origin_x; |
1546 | 0 | int16_t org_y = (int16_t)sb_origin_y; |
1547 | 0 | uint32_t subsample_sad = 1; |
1548 | |
|
1549 | 0 | int32_t search_region_index = org_x + (org_y)*ref_pic_ptr->y_stride; |
1550 | |
|
1551 | 0 | zero_mv_sad = svt_nxm_sad_kernel(me_ctx->b64_src_ptr, |
1552 | 0 | me_ctx->b64_src_stride << subsample_sad, |
1553 | 0 | &(ref_pic_ptr->y_buffer[search_region_index]), |
1554 | 0 | ref_pic_ptr->y_stride << subsample_sad, |
1555 | 0 | sb_height >> subsample_sad, |
1556 | 0 | sb_width); |
1557 | |
|
1558 | 0 | zero_mv_sad = zero_mv_sad << subsample_sad; |
1559 | |
|
1560 | 0 | return zero_mv_sad; |
1561 | 0 | } |
1562 | | |
1563 | | // Determine if pre-HME for the current picture and search region should be skipped. |
1564 | | // Return 1 if can early exit (i.e. skip pre-hme for current frame and search region) |
1565 | | // Return 0 if can't skip |
1566 | 0 | static bool check_prehme_early_exit(MeContext* me_ctx, uint8_t list_i, uint8_t ref_i, uint8_t sr_i) { |
1567 | 0 | SearchInfo* prehme_data = &me_ctx->prehme_data[list_i][ref_i][sr_i]; |
1568 | |
|
1569 | 0 | if (me_ctx->me_early_exit_th) { |
1570 | 0 | if (me_ctx->zz_sad[list_i][ref_i] < me_ctx->me_early_exit_th) { |
1571 | 0 | prehme_data->best_mv.as_int = 0; |
1572 | 0 | prehme_data->sad = 0; |
1573 | 0 | prehme_data->valid = 1; |
1574 | 0 | return 1; |
1575 | 0 | } |
1576 | 0 | } |
1577 | | |
1578 | 0 | if (me_ctx->prehme_ctrl.l1_early_exit) { |
1579 | 0 | if (list_i == 1 && me_ctx->prehme_data[0][ref_i][sr_i].valid && |
1580 | 0 | ((me_ctx->prehme_data[0][ref_i][sr_i].sad < (32 * 32)) || |
1581 | 0 | ((ABS(me_ctx->prehme_data[0][ref_i][sr_i].best_mv.x) < 16) && |
1582 | 0 | (ABS(me_ctx->prehme_data[0][ref_i][sr_i].best_mv.y) < 16)))) { |
1583 | 0 | prehme_data->best_mv.x = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.x; |
1584 | 0 | prehme_data->best_mv.y = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.y; |
1585 | 0 | prehme_data->sad = me_ctx->prehme_data[0][ref_i][sr_i].sad; |
1586 | 0 | prehme_data->valid = 1; |
1587 | 0 | return 1; |
1588 | 0 | } |
1589 | 0 | } |
1590 | 0 | return 0; |
1591 | 0 | } |
1592 | | |
1593 | | /* Perform Pre-HME for one Block 64x64*/ |
1594 | | static void prehme_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx, |
1595 | 0 | EbPictureBufferDesc* input_ptr) { |
1596 | 0 | const uint32_t block_width = me_ctx->b64_width; |
1597 | 0 | const uint32_t block_height = me_ctx->b64_height; |
1598 | 0 | uint32_t best_sad = MAX_U32; |
1599 | | // List Loop |
1600 | 0 | for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) { |
1601 | | // Ref Picture Loop |
1602 | 0 | const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_i]; |
1603 | 0 | for (uint8_t ref_i = 0; ref_i < num_of_ref_pic_to_search; ++ref_i) { |
1604 | 0 | uint16_t dist = 0; |
1605 | 0 | EbPictureBufferDesc* sixteenth_ref_pic = get_me_reference( |
1606 | 0 | pcs, me_ctx, list_i, ref_i, 0, &dist, input_ptr->width, input_ptr->height); |
1607 | |
|
1608 | 0 | if (me_ctx->temporal_layer_index > 0 || list_i == 0) { |
1609 | 0 | uint32_t hme_sr_factor = svt_aom_get_scaled_picture_distance(dist); |
1610 | |
|
1611 | 0 | for (uint8_t sr_i = 0; sr_i < SEARCH_REGION_COUNT; sr_i++) { |
1612 | 0 | if (check_prehme_early_exit(me_ctx, list_i, ref_i, sr_i)) { |
1613 | 0 | continue; |
1614 | 0 | } |
1615 | | |
1616 | 0 | SearchInfo* prehme_data = &me_ctx->prehme_data[list_i][ref_i][sr_i]; |
1617 | 0 | if (!me_ctx->search_results[list_i][ref_i].do_ref) { |
1618 | 0 | prehme_data->best_mv.as_int = 0; |
1619 | 0 | prehme_data->sad = MAX_U32; |
1620 | 0 | continue; |
1621 | 0 | } |
1622 | 0 | prehme_data->sa.width = MIN((me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_min.width * hme_sr_factor), |
1623 | 0 | me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_max.width); |
1624 | 0 | prehme_data->sa.height = MIN( |
1625 | 0 | (me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_min.height * hme_sr_factor), |
1626 | 0 | me_ctx->prehme_ctrl.prehme_sa_cfg[sr_i].sa_max.height); |
1627 | |
|
1628 | 0 | prehme_core(me_ctx, |
1629 | 0 | ((int16_t)org_x) >> 2, |
1630 | 0 | ((int16_t)org_y) >> 2, |
1631 | 0 | block_width >> 2, |
1632 | 0 | block_height >> 2, |
1633 | 0 | sixteenth_ref_pic, |
1634 | 0 | prehme_data); |
1635 | 0 | me_ctx->performed_phme[list_i][ref_i][sr_i] = 1; |
1636 | 0 | } |
1637 | 0 | uint32_t min_sad = (uint32_t)MIN(me_ctx->prehme_data[list_i][ref_i][0].sad, |
1638 | 0 | me_ctx->prehme_data[list_i][ref_i][1].sad); |
1639 | 0 | best_sad = MIN(best_sad, min_sad); |
1640 | 0 | } else { |
1641 | | // PW: Does this account for base pictures |
1642 | 0 | for (uint8_t sr_i = 0; sr_i < SEARCH_REGION_COUNT; sr_i++) { |
1643 | 0 | me_ctx->prehme_data[1][ref_i][sr_i].best_mv.x = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.x; |
1644 | 0 | me_ctx->prehme_data[1][ref_i][sr_i].best_mv.y = -me_ctx->prehme_data[0][ref_i][sr_i].best_mv.y; |
1645 | 0 | me_ctx->prehme_data[1][ref_i][sr_i].sad = me_ctx->prehme_data[0][ref_i][sr_i].sad; |
1646 | 0 | } |
1647 | 0 | } |
1648 | 0 | } // End ref pic loop |
1649 | 0 | } // End list loop |
1650 | 0 | if (me_ctx->temporal_layer_index > 0 && best_sad < me_ctx->me_hme_prune_ctrls.phme_sad_th) { |
1651 | 0 | for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) { |
1652 | 0 | for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) { |
1653 | 0 | if (!me_ctx->search_results[list_i][ref_i].do_ref) { |
1654 | 0 | continue; |
1655 | 0 | } |
1656 | 0 | if (ref_i == 0) { |
1657 | 0 | continue; |
1658 | 0 | } |
1659 | | |
1660 | 0 | const uint32_t prhme_th = me_ctx->me_hme_prune_ctrls.phme_sad_pct; |
1661 | 0 | uint32_t prehme_sad = (uint32_t)MIN(me_ctx->prehme_data[list_i][ref_i][0].sad, |
1662 | 0 | me_ctx->prehme_data[list_i][ref_i][1].sad); |
1663 | 0 | if ((prehme_sad - best_sad) * 100 > (prhme_th * best_sad)) { |
1664 | 0 | me_ctx->search_results[list_i][ref_i].do_ref = 0; |
1665 | 0 | } |
1666 | 0 | } |
1667 | 0 | } |
1668 | 0 | } |
1669 | 0 | } |
1670 | | |
1671 | | // Set the HME L0 search area. Perform scaling based on list index and ref index. |
1672 | | // HME L0 search area should be the same for each search region |
1673 | | static void get_hme_l0_search_area(MeContext* me_ctx, uint8_t list_index, uint8_t ref_pic_index, uint16_t dist, |
1674 | 0 | int16_t* sa_width, int16_t* sa_height) { |
1675 | | // Reduce HME search area for higher ref indices |
1676 | 0 | if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment && |
1677 | 0 | me_ctx->me_sr_adjustment_ctrls.distance_based_hme_resizing) { |
1678 | 0 | uint8_t is_hor = 1; |
1679 | 0 | uint8_t is_ver = 1; |
1680 | 0 | uint8_t is_still = 0; |
1681 | |
|
1682 | 0 | if (me_ctx->reduce_hme_l0_sr_th_min && me_ctx->reduce_hme_l0_sr_th_max) { |
1683 | 0 | if (list_index || ref_pic_index) { |
1684 | 0 | int16_t l0_mvx = me_ctx->x_hme_level0_search_center[0][0][0 /*quadrant-x*/][0 /*quadrant-y*/]; |
1685 | 0 | int16_t l0_mvy = me_ctx->y_hme_level0_search_center[0][0][0 /*quadrant-x*/][0 /*quadrant-y*/]; |
1686 | | |
1687 | | // Determine whether the computed motion from list0/ref_index0 is in vertical or horizintal direction |
1688 | 0 | is_ver = ((ABS(l0_mvx) < me_ctx->reduce_hme_l0_sr_th_min) && |
1689 | 0 | (ABS(l0_mvy) > me_ctx->reduce_hme_l0_sr_th_max)); |
1690 | 0 | is_hor = ((ABS(l0_mvx) > me_ctx->reduce_hme_l0_sr_th_max) && |
1691 | 0 | (ABS(l0_mvy) < me_ctx->reduce_hme_l0_sr_th_min)); |
1692 | 0 | is_still = ((ABS(l0_mvx) < (me_ctx->reduce_hme_l0_sr_th_min * 3)) && |
1693 | 0 | (ABS(l0_mvy) < (me_ctx->reduce_hme_l0_sr_th_min * 3))); |
1694 | 0 | } |
1695 | 0 | } |
1696 | |
|
1697 | 0 | uint8_t x_offset = 1; |
1698 | 0 | uint8_t y_offset = 1; |
1699 | 0 | if (!is_ver) { |
1700 | 0 | y_offset = 2; |
1701 | 0 | } |
1702 | 0 | if (!is_hor) { |
1703 | 0 | x_offset = 2; |
1704 | 0 | } |
1705 | |
|
1706 | 0 | if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment == 2) { |
1707 | 0 | if (is_still) { |
1708 | 0 | x_offset = 4; |
1709 | 0 | y_offset = 4; |
1710 | 0 | } |
1711 | 0 | } |
1712 | |
|
1713 | 0 | me_ctx->hme_l0_sa.sa_min.width = me_ctx->hme_l0_sa.sa_min.width / (x_offset + ref_pic_index); |
1714 | 0 | me_ctx->hme_l0_sa.sa_min.height = me_ctx->hme_l0_sa.sa_min.height / (y_offset + ref_pic_index); |
1715 | 0 | me_ctx->hme_l0_sa.sa_max.width = me_ctx->hme_l0_sa.sa_max.width / (x_offset + ref_pic_index); |
1716 | 0 | me_ctx->hme_l0_sa.sa_max.height = me_ctx->hme_l0_sa.sa_max.height / (y_offset + ref_pic_index); |
1717 | 0 | } |
1718 | |
|
1719 | 0 | int32_t hme_sr_factor = svt_aom_get_scaled_picture_distance(dist); |
1720 | | |
1721 | | // Derive the search area width and height, rounding the width up to the nearest sixteenth |
1722 | 0 | int16_t search_area_width = me_ctx->hme_l0_sa.sa_min.width / me_ctx->num_hme_sa_w; |
1723 | 0 | search_area_width = (int16_t)MIN((((search_area_width * hme_sr_factor) + 15) & ~0x0F), |
1724 | 0 | (((me_ctx->hme_l0_sa.sa_max.width / me_ctx->num_hme_sa_w) + 15) & ~0x0F)); |
1725 | 0 | int16_t search_area_height = me_ctx->hme_l0_sa.sa_min.height / me_ctx->num_hme_sa_h; |
1726 | 0 | search_area_height = (int16_t)MIN((search_area_height * hme_sr_factor), |
1727 | 0 | me_ctx->hme_l0_sa.sa_max.height / me_ctx->num_hme_sa_h); |
1728 | |
|
1729 | 0 | *sa_width = search_area_width; |
1730 | 0 | *sa_height = search_area_height; |
1731 | 0 | } |
1732 | | |
1733 | | //this functions returns the worst quadrant in terms of sad. |
1734 | | //it is implemented w/o for loops to get away from a VS2022 compiler issue. |
1735 | | //it then assumes a fixed quadrant sizes of 2 each direction. |
1736 | | static void get_worst_quadrant(MeContext* me_ctx, uint32_t list_index, uint32_t ref_pic_index, uint8_t* best_w, |
1737 | 0 | uint8_t* best_h) { |
1738 | 0 | if (me_ctx->num_hme_sa_w != 2 || me_ctx->num_hme_sa_h != 2) { |
1739 | 0 | svt_aom_assert_err(0, "update other quadrant sizes"); |
1740 | 0 | return; |
1741 | 0 | } |
1742 | 0 | uint64_t max_sad = 0; |
1743 | |
|
1744 | 0 | if (me_ctx->hme_level0_sad[list_index][ref_pic_index][0][0] > max_sad) { |
1745 | 0 | max_sad = me_ctx->hme_level0_sad[list_index][ref_pic_index][0][0]; |
1746 | 0 | *best_w = 0; |
1747 | 0 | *best_h = 0; |
1748 | 0 | } |
1749 | 0 | if (me_ctx->hme_level0_sad[list_index][ref_pic_index][1][0] > max_sad) { |
1750 | 0 | max_sad = me_ctx->hme_level0_sad[list_index][ref_pic_index][1][0]; |
1751 | 0 | *best_w = 1; |
1752 | 0 | *best_h = 0; |
1753 | 0 | } |
1754 | 0 | if (me_ctx->hme_level0_sad[list_index][ref_pic_index][0][1] > max_sad) { |
1755 | 0 | max_sad = me_ctx->hme_level0_sad[list_index][ref_pic_index][0][1]; |
1756 | 0 | *best_w = 0; |
1757 | 0 | *best_h = 1; |
1758 | 0 | } |
1759 | 0 | if (me_ctx->hme_level0_sad[list_index][ref_pic_index][1][1] > max_sad) { |
1760 | 0 | *best_w = 1; |
1761 | 0 | *best_h = 1; |
1762 | 0 | } |
1763 | 0 | } |
1764 | | |
1765 | | /******************************************* |
1766 | | * performs hierarchical ME level 0 for one 64x64 block (uni-prediction only) |
1767 | | *******************************************/ |
1768 | | static void hme_level0_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx, |
1769 | 0 | EbPictureBufferDesc* input_ptr) { |
1770 | 0 | const uint32_t block_width = me_ctx->b64_width; |
1771 | 0 | const uint32_t block_height = me_ctx->b64_height; |
1772 | | |
1773 | | // store base HME sizes, to be used if using ref-index based HME resizing |
1774 | 0 | SearchAreaMinMax base_hme_sa; |
1775 | 0 | base_hme_sa.sa_min = (SearchArea){me_ctx->hme_l0_sa.sa_min.width, me_ctx->hme_l0_sa.sa_min.height}; |
1776 | 0 | base_hme_sa.sa_max = (SearchArea){me_ctx->hme_l0_sa.sa_max.width, me_ctx->hme_l0_sa.sa_max.height}; |
1777 | | |
1778 | | // List Loop |
1779 | 0 | const uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search; |
1780 | 0 | for (uint8_t list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) { |
1781 | | // Ref Picture Loop |
1782 | 0 | const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
1783 | 0 | for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) { |
1784 | | // If me_early_exit_th is enabled, skip HME L0 for the current block if the zero-zero SAD is low |
1785 | 0 | if (me_ctx->me_early_exit_th) { |
1786 | 0 | if (me_ctx->zz_sad[list_index][ref_pic_index] < (me_ctx->me_early_exit_th >> 2)) { |
1787 | 0 | for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) { |
1788 | 0 | for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) { |
1789 | 0 | me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1790 | 0 | me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1791 | 0 | me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1792 | 0 | } |
1793 | 0 | } |
1794 | 0 | continue; |
1795 | 0 | } |
1796 | 0 | } |
1797 | 0 | if (me_ctx->prev_me_stage_based_exit_th) { |
1798 | 0 | uint8_t sr_i = me_ctx->prehme_data[list_index][ref_pic_index][0].sad <= |
1799 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][1].sad |
1800 | 0 | ? 0 |
1801 | 0 | : 1; |
1802 | 0 | if (me_ctx->performed_phme[list_index][ref_pic_index][sr_i]) { |
1803 | 0 | if (me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad < |
1804 | 0 | (me_ctx->prev_me_stage_based_exit_th >> 4)) { |
1805 | 0 | for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) { |
1806 | 0 | for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) { |
1807 | 0 | me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = |
1808 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.x; |
1809 | 0 | me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = |
1810 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.y; |
1811 | 0 | me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = |
1812 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad; |
1813 | 0 | } |
1814 | 0 | } |
1815 | 0 | continue; |
1816 | 0 | } |
1817 | 0 | } |
1818 | 0 | } |
1819 | | |
1820 | 0 | if (!me_ctx->search_results[list_index][ref_pic_index].do_ref) { |
1821 | 0 | for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) { |
1822 | 0 | for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) { |
1823 | 0 | me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1824 | 0 | me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1825 | 0 | me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = MAX_U32; |
1826 | 0 | } |
1827 | 0 | } |
1828 | 0 | continue; |
1829 | 0 | } |
1830 | | // Get the sixteenth downsampled reference picture |
1831 | 0 | uint16_t dist = 0; |
1832 | 0 | EbPictureBufferDesc* sixteenth_ref_pic = get_me_reference( |
1833 | 0 | pcs, me_ctx, list_index, ref_pic_index, 0, &dist, input_ptr->width, input_ptr->height); |
1834 | |
|
1835 | 0 | if (me_ctx->temporal_layer_index > 0 || list_index == 0) { |
1836 | | // Get the HME L0 search dimensions for the current frame |
1837 | 0 | int16_t sa_width = 0, sa_height = 0; |
1838 | 0 | get_hme_l0_search_area(me_ctx, list_index, ref_pic_index, dist, &sa_width, &sa_height); |
1839 | 0 | for (uint8_t sr_h = 0; sr_h < me_ctx->num_hme_sa_h; sr_h++) { |
1840 | 0 | for (uint8_t sr_w = 0; sr_w < me_ctx->num_hme_sa_w; sr_w++) { |
1841 | 0 | hme_level_0(me_ctx, |
1842 | 0 | ((int16_t)org_x) >> 2, |
1843 | 0 | ((int16_t)org_y) >> 2, |
1844 | 0 | block_width >> 2, |
1845 | 0 | block_height >> 2, |
1846 | 0 | sa_width, |
1847 | 0 | sa_height, |
1848 | 0 | sixteenth_ref_pic, |
1849 | 0 | sr_w, |
1850 | 0 | sr_h, |
1851 | 0 | &(me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w][sr_h]), |
1852 | 0 | &(me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h]), |
1853 | 0 | &(me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h])); |
1854 | 0 | } |
1855 | 0 | } |
1856 | | |
1857 | | // reset base HME area |
1858 | 0 | if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment && |
1859 | 0 | me_ctx->me_sr_adjustment_ctrls.distance_based_hme_resizing) { |
1860 | 0 | me_ctx->hme_l0_sa.sa_min = base_hme_sa.sa_min; |
1861 | 0 | me_ctx->hme_l0_sa.sa_max = base_hme_sa.sa_max; |
1862 | 0 | } |
1863 | |
|
1864 | 0 | if (me_ctx->prehme_ctrl.enable) { |
1865 | | //get the worst quadrant |
1866 | 0 | uint8_t sr_h_max = 0, sr_w_max = 0; |
1867 | 0 | get_worst_quadrant(me_ctx, list_index, ref_pic_index, &sr_w_max, &sr_h_max); |
1868 | |
|
1869 | 0 | uint8_t sr_i = me_ctx->prehme_data[list_index][ref_pic_index][0].sad <= |
1870 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][1].sad |
1871 | 0 | ? 0 |
1872 | 0 | : 1; |
1873 | | //replace worst with pre-hme |
1874 | 0 | if (me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad < |
1875 | 0 | me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w_max][sr_h_max]) { |
1876 | 0 | me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w_max][sr_h_max] = |
1877 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][sr_i].sad; |
1878 | |
|
1879 | 0 | me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w_max][sr_h_max] = |
1880 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.x; |
1881 | |
|
1882 | 0 | me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w_max][sr_h_max] = |
1883 | 0 | me_ctx->prehme_data[list_index][ref_pic_index][sr_i].best_mv.y; |
1884 | 0 | } |
1885 | 0 | } |
1886 | 0 | } |
1887 | 0 | } // End ref pic loop |
1888 | 0 | } // End list loop |
1889 | 0 | } |
1890 | | |
1891 | | /******************************************* |
1892 | | * performs hierarchical ME level 1 for one 64x64 block (uni-prediction only) |
1893 | | *******************************************/ |
1894 | | static void hme_level1_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx, |
1895 | 0 | EbPictureBufferDesc* input_ptr) { |
1896 | 0 | const uint32_t block_width = me_ctx->b64_width; |
1897 | 0 | const uint32_t block_height = me_ctx->b64_height; |
1898 | | |
1899 | | // List Loop |
1900 | 0 | const uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search; |
1901 | 0 | for (uint32_t list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) { |
1902 | | // Ref Picture Loop |
1903 | 0 | const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
1904 | 0 | for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) { |
1905 | 0 | uint16_t dist = 0; |
1906 | 0 | EbPictureBufferDesc* quarter_ref_pic = get_me_reference( |
1907 | 0 | pcs, me_ctx, list_index, ref_pic_index, 1, &dist, input_ptr->width, input_ptr->height); |
1908 | |
|
1909 | 0 | if (me_ctx->temporal_layer_index > 0 || list_index == 0) { |
1910 | | // If me_early_exit_th is enabled, skip HME L0 for the current block if the zero-zero SAD is low |
1911 | 0 | if (me_ctx->me_early_exit_th) { |
1912 | 0 | if (me_ctx->zz_sad[list_index][ref_pic_index] < (me_ctx->me_early_exit_th >> 2)) { |
1913 | 0 | for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) { |
1914 | 0 | for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) { |
1915 | 0 | me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1916 | 0 | me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1917 | 0 | me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1918 | 0 | } |
1919 | 0 | } |
1920 | 0 | continue; |
1921 | 0 | } |
1922 | 0 | } |
1923 | 0 | if (!me_ctx->search_results[list_index][ref_pic_index].do_ref) { |
1924 | 0 | for (uint32_t sr_idx_y = 0; sr_idx_y < me_ctx->num_hme_sa_h; sr_idx_y++) { |
1925 | 0 | for (uint32_t sr_idx_x = 0; sr_idx_x < me_ctx->num_hme_sa_w; sr_idx_x++) { |
1926 | 0 | me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1927 | 0 | me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = 0; |
1928 | 0 | me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_idx_x][sr_idx_y] = MAX_U32; |
1929 | 0 | } |
1930 | 0 | } |
1931 | 0 | continue; |
1932 | 0 | } |
1933 | 0 | for (uint8_t sr_h = 0; sr_h < me_ctx->num_hme_sa_h; sr_h++) { |
1934 | 0 | for (uint8_t sr_w = 0; sr_w < me_ctx->num_hme_sa_w; sr_w++) { |
1935 | 0 | if (me_ctx->prev_me_stage_based_exit_th) { |
1936 | 0 | if (me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w][sr_h] < |
1937 | 0 | (me_ctx->prev_me_stage_based_exit_th >> 5)) { |
1938 | 0 | me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h] = |
1939 | 0 | me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h]; |
1940 | 0 | me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h] = |
1941 | 0 | me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h]; |
1942 | 0 | me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h] = |
1943 | 0 | me_ctx->hme_level0_sad[list_index][ref_pic_index][sr_w][sr_h]; |
1944 | 0 | continue; |
1945 | 0 | } |
1946 | 0 | } |
1947 | | |
1948 | 0 | hme_level_1(me_ctx, |
1949 | 0 | ((int16_t)org_x) >> 1, |
1950 | 0 | ((int16_t)org_y) >> 1, |
1951 | 0 | block_width >> 1, |
1952 | 0 | block_height >> 1, |
1953 | 0 | quarter_ref_pic, |
1954 | 0 | (int16_t)me_ctx->hme_l1_sa.width, |
1955 | 0 | (int16_t)me_ctx->hme_l1_sa.height, |
1956 | 0 | me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h] >> 1, |
1957 | 0 | me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][sr_w][sr_h] >> 1, |
1958 | 0 | &(me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h]), |
1959 | 0 | &(me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h]), |
1960 | 0 | &(me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h])); |
1961 | 0 | } |
1962 | 0 | } |
1963 | 0 | } |
1964 | 0 | } // End ref pic loop |
1965 | 0 | } // End list loop |
1966 | 0 | } |
1967 | | |
1968 | | /******************************************* |
1969 | | * performs hierarchical ME level 2 for one 64x64 block (uni-prediction only) |
1970 | | *******************************************/ |
1971 | | static void hme_level2_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx, |
1972 | 0 | EbPictureBufferDesc* input_ptr) { |
1973 | 0 | const uint32_t block_width = me_ctx->b64_width; |
1974 | 0 | const uint32_t block_height = me_ctx->b64_height; |
1975 | | // List Loop |
1976 | 0 | const uint8_t num_of_list_to_search = me_ctx->num_of_list_to_search; |
1977 | 0 | for (int list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) { |
1978 | | // Ref Picture Loop |
1979 | 0 | const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
1980 | 0 | for (uint8_t ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) { |
1981 | 0 | uint16_t dist = 0; |
1982 | 0 | EbPictureBufferDesc* ref_pic = get_me_reference( |
1983 | 0 | pcs, me_ctx, list_index, ref_pic_index, 2, &dist, input_ptr->width, input_ptr->height); |
1984 | |
|
1985 | 0 | if (me_ctx->temporal_layer_index > 0 || list_index == 0) { |
1986 | 0 | for (uint8_t sr_h = 0; sr_h < me_ctx->num_hme_sa_h; sr_h++) { |
1987 | 0 | for (uint8_t sr_w = 0; sr_w < me_ctx->num_hme_sa_w; sr_w++) { |
1988 | 0 | if (me_ctx->prev_me_stage_based_exit_th) { |
1989 | 0 | if (me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h] < |
1990 | 0 | (me_ctx->prev_me_stage_based_exit_th >> 2)) { |
1991 | 0 | me_ctx->x_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h] = |
1992 | 0 | me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h]; |
1993 | 0 | me_ctx->y_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h] = |
1994 | 0 | me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h]; |
1995 | 0 | me_ctx->hme_level2_sad[list_index][ref_pic_index][sr_w][sr_h] = |
1996 | 0 | me_ctx->hme_level1_sad[list_index][ref_pic_index][sr_w][sr_h]; |
1997 | 0 | continue; |
1998 | 0 | } |
1999 | 0 | } |
2000 | | |
2001 | 0 | hme_level_2(me_ctx, |
2002 | 0 | (int16_t)org_x, |
2003 | 0 | (int16_t)org_y, |
2004 | 0 | block_width, |
2005 | 0 | block_height, |
2006 | 0 | ref_pic, |
2007 | 0 | (int16_t)me_ctx->hme_l2_sa.width, |
2008 | 0 | (int16_t)me_ctx->hme_l2_sa.height, |
2009 | 0 | me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h], |
2010 | 0 | me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][sr_w][sr_h], |
2011 | 0 | &(me_ctx->hme_level2_sad[list_index][ref_pic_index][sr_w][sr_h]), |
2012 | 0 | &(me_ctx->x_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h]), |
2013 | 0 | &(me_ctx->y_hme_level2_search_center[list_index][ref_pic_index][sr_w][sr_h])); |
2014 | 0 | } |
2015 | 0 | } |
2016 | 0 | } |
2017 | 0 | } // End ref pic loop |
2018 | 0 | } // End list loop |
2019 | 0 | } |
2020 | | |
2021 | | /******************************************* |
2022 | | * Set the final search centre |
2023 | | *******************************************/ |
2024 | | |
2025 | 0 | void set_final_search_centre_sb(PictureParentControlSet* pcs, MeContext* me_ctx) { |
2026 | 0 | UNUSED(pcs); |
2027 | | // Hierarchical ME Search Center |
2028 | 0 | int16_t xHmeSearchCenter = 0; |
2029 | 0 | int16_t yHmeSearchCenter = 0; |
2030 | | |
2031 | | // Final ME Search Center |
2032 | 0 | int16_t x_search_center = 0; |
2033 | 0 | int16_t y_search_center = 0; |
2034 | | |
2035 | | // Search Center SADs |
2036 | 0 | uint64_t hmeMvSad = 0; |
2037 | 0 | uint32_t num_of_list_to_search; |
2038 | 0 | uint32_t list_index; |
2039 | 0 | uint8_t ref_pic_index; |
2040 | | // Configure HME level 0, level 1 and level 2 from static config parameters |
2041 | 0 | bool enable_hme_level0_flag = me_ctx->enable_hme_level0_flag; |
2042 | 0 | bool enable_hme_level1_flag = me_ctx->enable_hme_level1_flag; |
2043 | 0 | bool enable_hme_level2_flag = me_ctx->enable_hme_level2_flag; |
2044 | |
|
2045 | 0 | uint64_t best_cost = (uint64_t)~0; |
2046 | 0 | me_ctx->best_list_idx = 0; |
2047 | 0 | me_ctx->best_ref_idx = 0; |
2048 | 0 | num_of_list_to_search = me_ctx->num_of_list_to_search; |
2049 | | |
2050 | | // Uni-Prediction motion estimation loop |
2051 | | // List Loop |
2052 | 0 | for (list_index = REF_LIST_0; list_index < num_of_list_to_search; ++list_index) { |
2053 | 0 | uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
2054 | | // Ref Picture Loop |
2055 | 0 | for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search; ++ref_pic_index) { |
2056 | 0 | if (me_ctx->temporal_layer_index > 0 || list_index == 0) { |
2057 | 0 | if (me_ctx->enable_hme_flag) { |
2058 | | // Hierarchical ME - Search Center |
2059 | 0 | if (enable_hme_level0_flag && !enable_hme_level1_flag && !enable_hme_level2_flag) { |
2060 | 0 | xHmeSearchCenter = me_ctx->x_hme_level0_search_center[list_index][ref_pic_index][0][0]; |
2061 | 0 | yHmeSearchCenter = me_ctx->y_hme_level0_search_center[list_index][ref_pic_index][0][0]; |
2062 | 0 | hmeMvSad = me_ctx->hme_level0_sad[list_index][ref_pic_index][0][0]; |
2063 | |
|
2064 | 0 | uint32_t search_region_number_in_width = 1; |
2065 | 0 | uint32_t search_region_number_in_height = 0; |
2066 | 0 | while (search_region_number_in_height < me_ctx->num_hme_sa_h) { |
2067 | 0 | while (search_region_number_in_width < me_ctx->num_hme_sa_w) { |
2068 | 0 | xHmeSearchCenter = |
2069 | 0 | (me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width] |
2070 | 0 | [search_region_number_in_height] < hmeMvSad) |
2071 | 0 | ? me_ctx->x_hme_level0_search_center[list_index][ref_pic_index] |
2072 | 0 | [search_region_number_in_width] |
2073 | 0 | [search_region_number_in_height] |
2074 | 0 | : xHmeSearchCenter; |
2075 | 0 | yHmeSearchCenter = |
2076 | 0 | (me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width] |
2077 | 0 | [search_region_number_in_height] < hmeMvSad) |
2078 | 0 | ? me_ctx->y_hme_level0_search_center[list_index][ref_pic_index] |
2079 | 0 | [search_region_number_in_width] |
2080 | 0 | [search_region_number_in_height] |
2081 | 0 | : yHmeSearchCenter; |
2082 | 0 | hmeMvSad = |
2083 | 0 | (me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width] |
2084 | 0 | [search_region_number_in_height] < hmeMvSad) |
2085 | 0 | ? me_ctx->hme_level0_sad[list_index][ref_pic_index][search_region_number_in_width] |
2086 | 0 | [search_region_number_in_height] |
2087 | 0 | : hmeMvSad; |
2088 | 0 | search_region_number_in_width++; |
2089 | 0 | } |
2090 | 0 | search_region_number_in_width = 0; |
2091 | 0 | search_region_number_in_height++; |
2092 | 0 | } |
2093 | 0 | } |
2094 | |
|
2095 | 0 | if (enable_hme_level1_flag && !enable_hme_level2_flag) { |
2096 | 0 | xHmeSearchCenter = me_ctx->x_hme_level1_search_center[list_index][ref_pic_index][0][0]; |
2097 | 0 | yHmeSearchCenter = me_ctx->y_hme_level1_search_center[list_index][ref_pic_index][0][0]; |
2098 | 0 | hmeMvSad = me_ctx->hme_level1_sad[list_index][ref_pic_index][0][0]; |
2099 | |
|
2100 | 0 | uint32_t search_region_number_in_width = 1; |
2101 | 0 | uint32_t search_region_number_in_height = 0; |
2102 | 0 | while (search_region_number_in_height < me_ctx->num_hme_sa_h) { |
2103 | 0 | while (search_region_number_in_width < me_ctx->num_hme_sa_w) { |
2104 | 0 | xHmeSearchCenter = |
2105 | 0 | (me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width] |
2106 | 0 | [search_region_number_in_height] < hmeMvSad) |
2107 | 0 | ? me_ctx->x_hme_level1_search_center[list_index][ref_pic_index] |
2108 | 0 | [search_region_number_in_width] |
2109 | 0 | [search_region_number_in_height] |
2110 | 0 | : xHmeSearchCenter; |
2111 | 0 | yHmeSearchCenter = |
2112 | 0 | (me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width] |
2113 | 0 | [search_region_number_in_height] < hmeMvSad) |
2114 | 0 | ? me_ctx->y_hme_level1_search_center[list_index][ref_pic_index] |
2115 | 0 | [search_region_number_in_width] |
2116 | 0 | [search_region_number_in_height] |
2117 | 0 | : yHmeSearchCenter; |
2118 | 0 | hmeMvSad = |
2119 | 0 | (me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width] |
2120 | 0 | [search_region_number_in_height] < hmeMvSad) |
2121 | 0 | ? me_ctx->hme_level1_sad[list_index][ref_pic_index][search_region_number_in_width] |
2122 | 0 | [search_region_number_in_height] |
2123 | 0 | : hmeMvSad; |
2124 | 0 | search_region_number_in_width++; |
2125 | 0 | } |
2126 | 0 | search_region_number_in_width = 0; |
2127 | 0 | search_region_number_in_height++; |
2128 | 0 | } |
2129 | 0 | } |
2130 | |
|
2131 | 0 | if (enable_hme_level2_flag) { |
2132 | 0 | xHmeSearchCenter = me_ctx->x_hme_level2_search_center[list_index][ref_pic_index][0][0]; |
2133 | 0 | yHmeSearchCenter = me_ctx->y_hme_level2_search_center[list_index][ref_pic_index][0][0]; |
2134 | 0 | hmeMvSad = me_ctx->hme_level2_sad[list_index][ref_pic_index][0][0]; |
2135 | |
|
2136 | 0 | uint32_t search_region_number_in_width = 1; |
2137 | 0 | uint32_t search_region_number_in_height = 0; |
2138 | 0 | while (search_region_number_in_height < me_ctx->num_hme_sa_h) { |
2139 | 0 | while (search_region_number_in_width < me_ctx->num_hme_sa_w) { |
2140 | 0 | xHmeSearchCenter = |
2141 | 0 | (me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width] |
2142 | 0 | [search_region_number_in_height] < hmeMvSad) |
2143 | 0 | ? me_ctx->x_hme_level2_search_center[list_index][ref_pic_index] |
2144 | 0 | [search_region_number_in_width] |
2145 | 0 | [search_region_number_in_height] |
2146 | 0 | : xHmeSearchCenter; |
2147 | 0 | yHmeSearchCenter = |
2148 | 0 | (me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width] |
2149 | 0 | [search_region_number_in_height] < hmeMvSad) |
2150 | 0 | ? me_ctx->y_hme_level2_search_center[list_index][ref_pic_index] |
2151 | 0 | [search_region_number_in_width] |
2152 | 0 | [search_region_number_in_height] |
2153 | 0 | : yHmeSearchCenter; |
2154 | 0 | hmeMvSad = |
2155 | 0 | (me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width] |
2156 | 0 | [search_region_number_in_height] < hmeMvSad) |
2157 | 0 | ? me_ctx->hme_level2_sad[list_index][ref_pic_index][search_region_number_in_width] |
2158 | 0 | [search_region_number_in_height] |
2159 | 0 | : hmeMvSad; |
2160 | 0 | search_region_number_in_width++; |
2161 | 0 | } |
2162 | 0 | search_region_number_in_width = 0; |
2163 | 0 | search_region_number_in_height++; |
2164 | 0 | } |
2165 | 0 | } |
2166 | |
|
2167 | 0 | x_search_center = xHmeSearchCenter; |
2168 | 0 | y_search_center = yHmeSearchCenter; |
2169 | 0 | } |
2170 | 0 | } else { |
2171 | 0 | x_search_center = 0; |
2172 | 0 | y_search_center = 0; |
2173 | 0 | } |
2174 | | |
2175 | | //sc valid for all cases. 0,0 if hme not done. |
2176 | 0 | me_ctx->search_results[list_index][ref_pic_index].hme_sc_x = x_search_center; |
2177 | 0 | me_ctx->search_results[list_index][ref_pic_index].hme_sc_y = y_search_center; |
2178 | |
|
2179 | 0 | me_ctx->search_results[list_index][ref_pic_index].hme_sad = |
2180 | 0 | hmeMvSad; //this is not valid in all cases. only when HME is done, and when HMELevel2 is done |
2181 | | //also for base layer some references are redundant!! |
2182 | 0 | if (hmeMvSad < best_cost) { |
2183 | 0 | best_cost = hmeMvSad; |
2184 | 0 | me_ctx->best_list_idx = list_index; |
2185 | 0 | me_ctx->best_ref_idx = ref_pic_index; |
2186 | 0 | } |
2187 | 0 | } |
2188 | 0 | } |
2189 | 0 | } |
2190 | | |
2191 | | // Initialize zz SAD array |
2192 | 0 | static void init_zz_sad(PictureParentControlSet* pcs, MeContext* me_ctx, uint32_t org_x, uint32_t org_y) { |
2193 | 0 | const uint32_t block_width = me_ctx->b64_width; |
2194 | 0 | const uint32_t block_height = me_ctx->b64_height; |
2195 | 0 | uint32_t best_zz_sad = MAX_U32; |
2196 | | // List Loop |
2197 | 0 | for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) { |
2198 | | // Ref Picture Loop |
2199 | 0 | for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) { |
2200 | 0 | if (me_ctx->temporal_layer_index > 0 || list_i == 0) { |
2201 | 0 | EbPictureBufferDesc* ref_pic = me_ctx->me_ds_ref_array[list_i][ref_i].picture_ptr; |
2202 | 0 | uint32_t zz_sad = get_zz_sad(ref_pic, me_ctx, org_x, org_y, block_width, block_height); |
2203 | | //normalize for incomplete b64 |
2204 | 0 | zz_sad = (zz_sad * 64 * 64) / (block_width * block_height); |
2205 | 0 | me_ctx->zz_sad[list_i][ref_i] = zz_sad; |
2206 | 0 | best_zz_sad = MIN(best_zz_sad, zz_sad); |
2207 | 0 | } |
2208 | 0 | } |
2209 | 0 | } |
2210 | 0 | const uint32_t zz_th = me_ctx->me_hme_prune_ctrls.zz_sad_th; |
2211 | 0 | if (me_ctx->temporal_layer_index > 0 && best_zz_sad < zz_th) { |
2212 | 0 | for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) { |
2213 | 0 | for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) { |
2214 | 0 | if (ref_i == 0) { |
2215 | 0 | continue; |
2216 | 0 | } |
2217 | | |
2218 | 0 | const uint32_t zz_sad_pct = me_ctx->me_hme_prune_ctrls.zz_sad_pct; |
2219 | 0 | if ((me_ctx->zz_sad[list_i][ref_i] - best_zz_sad) * 100 > (zz_sad_pct * best_zz_sad)) { |
2220 | 0 | me_ctx->search_results[list_i][ref_i].do_ref = 0; |
2221 | 0 | } |
2222 | 0 | } |
2223 | 0 | } |
2224 | 0 | } |
2225 | |
|
2226 | 0 | const uint32_t safe_limit_zz_th = me_ctx->me_safe_limit_zz_th; |
2227 | 0 | if (safe_limit_zz_th) { |
2228 | 0 | bool me_safe_limit_refs = false; |
2229 | 0 | if (pcs->hierarchical_levels > 0 && me_ctx->num_of_list_to_search == 2 && |
2230 | 0 | pcs->temporal_layer_index >= pcs->hierarchical_levels && pcs->similar_brightness_refs && |
2231 | 0 | me_ctx->zz_sad[0][0] < safe_limit_zz_th && me_ctx->zz_sad[1][0] < safe_limit_zz_th) { |
2232 | 0 | me_safe_limit_refs = true; |
2233 | 0 | } |
2234 | |
|
2235 | 0 | for (int list_i = REF_LIST_0; list_i < me_ctx->num_of_list_to_search; ++list_i) { |
2236 | 0 | for (uint8_t ref_i = 0; ref_i < me_ctx->num_of_ref_pic_to_search[list_i]; ++ref_i) { |
2237 | 0 | if (me_safe_limit_refs && ref_i > 0) { |
2238 | 0 | me_ctx->search_results[list_i][ref_i].do_ref = 0; |
2239 | 0 | } |
2240 | 0 | } |
2241 | 0 | } |
2242 | 0 | } |
2243 | 0 | } |
2244 | | |
2245 | | /******************************************* |
2246 | | * performs hierarchical ME for a 64x64 block for every ref frame |
2247 | | *******************************************/ |
2248 | | static void hme_b64(PictureParentControlSet* pcs, uint32_t org_x, uint32_t org_y, MeContext* me_ctx, |
2249 | 0 | EbPictureBufferDesc* input_ptr) { |
2250 | | // If needed, initialize the zz sad array |
2251 | 0 | if (me_ctx->me_early_exit_th || me_ctx->me_safe_limit_zz_th) { |
2252 | 0 | init_zz_sad(pcs, me_ctx, org_x, org_y); |
2253 | 0 | } |
2254 | |
|
2255 | 0 | if (me_ctx->prehme_ctrl.enable) { |
2256 | | // perform pre-HME |
2257 | 0 | prehme_b64(pcs, org_x, org_y, me_ctx, input_ptr); |
2258 | 0 | } |
2259 | |
|
2260 | 0 | if (me_ctx->enable_hme_flag) { |
2261 | | // perform hierarchical ME level 0 |
2262 | 0 | if (me_ctx->enable_hme_level0_flag) { |
2263 | 0 | hme_level0_b64(pcs, org_x, org_y, me_ctx, input_ptr); |
2264 | 0 | } |
2265 | | |
2266 | | // perform hierarchical ME level 1 |
2267 | 0 | if (me_ctx->enable_hme_level1_flag) { |
2268 | 0 | hme_level1_b64(pcs, org_x, org_y, me_ctx, input_ptr); |
2269 | 0 | } |
2270 | | |
2271 | | // perform hierarchical ME level 2 |
2272 | 0 | if (me_ctx->enable_hme_level2_flag) { |
2273 | 0 | hme_level2_b64(pcs, org_x, org_y, me_ctx, input_ptr); |
2274 | 0 | } |
2275 | 0 | } |
2276 | | |
2277 | | // Set final MV centre |
2278 | 0 | set_final_search_centre_sb(pcs, me_ctx); |
2279 | |
|
2280 | 0 | if (me_ctx->me_type == ME_MCTF) { |
2281 | 0 | if (ABS(me_ctx->search_results[0][0].hme_sc_x) > ABS(me_ctx->search_results[0][0].hme_sc_y)) { |
2282 | 0 | me_ctx->tf_tot_horz_blks++; |
2283 | 0 | } else { |
2284 | 0 | me_ctx->tf_tot_vert_blks++; |
2285 | 0 | } |
2286 | 0 | } |
2287 | 0 | } |
2288 | | |
2289 | 0 | static void hme_prune_ref_and_adjust_sr(MeContext* me_ctx) { |
2290 | 0 | uint16_t prune_ref_th = me_ctx->me_hme_prune_ctrls.prune_ref_if_hme_sad_dev_bigger_than_th; |
2291 | 0 | if (me_ctx->me_hme_prune_ctrls.enable_me_hme_ref_pruning && (prune_ref_th != (uint16_t)~0)) { |
2292 | 0 | uint64_t best = (uint64_t)~0; |
2293 | 0 | for (int i = 0; i < MAX_NUM_OF_REF_PIC_LIST; ++i) { |
2294 | 0 | for (int j = 0; j < REF_LIST_MAX_DEPTH; ++j) { |
2295 | 0 | if (me_ctx->search_results[i][j].hme_sad < best) { |
2296 | 0 | best = me_ctx->search_results[i][j].hme_sad; |
2297 | 0 | } |
2298 | 0 | } |
2299 | 0 | } |
2300 | | // Prune references based on HME sad |
2301 | 0 | for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) { |
2302 | 0 | for (uint32_t ri = 1; ri < REF_LIST_MAX_DEPTH; ri++) { |
2303 | 0 | if ((me_ctx->search_results[li][ri].hme_sad - best) * 100 > (prune_ref_th * best)) { |
2304 | 0 | me_ctx->search_results[li][ri].do_ref = 0; |
2305 | 0 | } |
2306 | 0 | } |
2307 | 0 | } |
2308 | 0 | } |
2309 | 0 | if (me_ctx->me_sr_adjustment_ctrls.enable_me_sr_adjustment) { |
2310 | 0 | uint16_t mv_length_th = me_ctx->me_sr_adjustment_ctrls.reduce_me_sr_based_on_mv_length_th; |
2311 | 0 | uint16_t stationary_hme_sad_abs_th = me_ctx->me_sr_adjustment_ctrls.stationary_hme_sad_abs_th; |
2312 | 0 | uint16_t reduce_me_sr_based_on_hme_sad_abs_th = |
2313 | 0 | me_ctx->me_sr_adjustment_ctrls.reduce_me_sr_based_on_hme_sad_abs_th; |
2314 | | // Reduce the ME search region if the hme sad is low |
2315 | 0 | for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) { |
2316 | 0 | for (uint32_t ri = 0; ri < REF_LIST_MAX_DEPTH; ri++) { |
2317 | 0 | if (ABS(me_ctx->search_results[li][ri].hme_sc_x) <= mv_length_th && |
2318 | 0 | ABS(me_ctx->search_results[li][ri].hme_sc_y) <= mv_length_th && |
2319 | 0 | me_ctx->search_results[li][ri].hme_sad < stationary_hme_sad_abs_th) { |
2320 | 0 | me_ctx->reduce_me_sr_divisor[li][ri] = me_ctx->me_sr_adjustment_ctrls.stationary_me_sr_divisor; |
2321 | 0 | } else if (me_ctx->search_results[li][ri].hme_sad < reduce_me_sr_based_on_hme_sad_abs_th) { |
2322 | 0 | me_ctx->reduce_me_sr_divisor[li][ri] = me_ctx->me_sr_adjustment_ctrls.me_sr_divisor_for_low_hme_sad; |
2323 | 0 | } |
2324 | 0 | } |
2325 | 0 | } |
2326 | 0 | } |
2327 | 0 | } |
2328 | | |
2329 | | static const uint8_t z_to_raster[85] = { |
2330 | | 0, 1, 2, 3, 4, 5, 6, 9, 10, 7, 8, 11, 12, 13, 14, 17, 18, 15, 16, 19, 20, 21, 22, 29, 30, 23, 24, 31, 32, |
2331 | | 37, 38, 45, 46, 39, 40, 47, 48, 25, 26, 33, 34, 27, 28, 35, 36, 41, 42, 49, 50, 43, 44, 51, 52, 53, 54, 61, 62, 55, |
2332 | | 56, 63, 64, 69, 70, 77, 78, 71, 72, 79, 80, 57, 58, 65, 66, 59, 60, 67, 68, 73, 74, 81, 82, 75, 76, 83, 84}; |
2333 | | |
2334 | | static void construct_me_candidate_array_mrp_off(PictureParentControlSet* pcs, MeContext* me_ctx, |
2335 | 0 | uint32_t num_of_list_to_search, uint32_t sb_index) { |
2336 | | // This function should only be called if there is one ref frame in each list |
2337 | 0 | assert(me_ctx->num_of_ref_pic_to_search[0] == 1); |
2338 | 0 | assert(me_ctx->num_of_ref_pic_to_search[1] == 1); |
2339 | 0 | const uint8_t ref_pic_idx = 0; |
2340 | | |
2341 | | // Set whether the reference from each list is allowed |
2342 | 0 | uint8_t blk_do_ref_org[MAX_NUM_OF_REF_PIC_LIST]; |
2343 | 0 | blk_do_ref_org[REF_LIST_0] = me_ctx->search_results[REF_LIST_0][0].do_ref; |
2344 | 0 | blk_do_ref_org[REF_LIST_1] = (num_of_list_to_search == 1) ? 0 : me_ctx->search_results[REF_LIST_1][0].do_ref; |
2345 | |
|
2346 | 0 | if (num_of_list_to_search < 2 || !me_ctx->search_results[REF_LIST_1][0].do_ref) { |
2347 | 0 | num_of_list_to_search = 1; |
2348 | 0 | } |
2349 | 0 | const uint32_t me_prune_th = (blk_do_ref_org[0] && blk_do_ref_org[1]) ? me_ctx->prune_me_candidates_th : 0; |
2350 | | |
2351 | | // Set the count to 1 for all PUs using memset, which is faster than setting at the end of each loop. The count will only need |
2352 | | // to be updated if both reference frames are allowed. |
2353 | 0 | uint8_t number_of_pus = pcs->enable_me_16x16 |
2354 | 0 | ? pcs->enable_me_8x8 ? pcs->max_number_of_pus_per_sb : MAX_SB64_PU_COUNT_NO_8X8 |
2355 | 0 | : MAX_SB64_PU_COUNT_WO_16X16; |
2356 | 0 | memset(pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index, 1, number_of_pus); |
2357 | |
|
2358 | 0 | for (uint8_t n_idx = 0; n_idx < pcs->max_number_of_pus_per_sb; ++n_idx) { |
2359 | 0 | const uint8_t pu_index = z_to_raster[n_idx]; |
2360 | 0 | uint8_t me_cand_offset = 0; |
2361 | |
|
2362 | 0 | uint8_t use_me_pu = pcs->enable_me_16x16 ? pcs->enable_me_8x8 || n_idx < MAX_SB64_PU_COUNT_NO_8X8 |
2363 | 0 | : n_idx < MAX_SB64_PU_COUNT_WO_16X16; |
2364 | 0 | MeCandidate* me_candidate_array = NULL; |
2365 | 0 | if (use_me_pu) { |
2366 | 0 | me_candidate_array = |
2367 | 0 | &pcs->pa_me_data->me_results[sb_index]->me_candidate_array[pu_index * pcs->pa_me_data->max_cand]; |
2368 | 0 | } |
2369 | 0 | uint8_t blk_do_ref[MAX_NUM_OF_REF_PIC_LIST] = {blk_do_ref_org[REF_LIST_0], blk_do_ref_org[REF_LIST_1]}; |
2370 | 0 | const uint32_t best_me_dist = blk_do_ref_org[REF_LIST_0] && blk_do_ref_org[REF_LIST_1] |
2371 | 0 | ? MIN(me_ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx], |
2372 | 0 | me_ctx->p_sb_best_sad[REF_LIST_1][ref_pic_idx][n_idx]) |
2373 | 0 | : blk_do_ref_org[REF_LIST_0] ? me_ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx] |
2374 | 0 | : me_ctx->p_sb_best_sad[REF_LIST_1][ref_pic_idx][n_idx]; |
2375 | |
|
2376 | 0 | me_ctx->me_distortion[pu_index] = best_me_dist; |
2377 | 0 | int8_t min_dist_list = -1; |
2378 | | // If both refs have a candidate, use only the best one for unipred |
2379 | 0 | if (me_ctx->use_best_unipred_cand_only && blk_do_ref[REF_LIST_0] && blk_do_ref[REF_LIST_1]) { |
2380 | 0 | min_dist_list = me_ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx] < |
2381 | 0 | me_ctx->p_sb_best_sad[REF_LIST_1][ref_pic_idx][n_idx] |
2382 | 0 | ? 0 |
2383 | 0 | : 1; |
2384 | 0 | } |
2385 | | // Unipred candidates |
2386 | 0 | for (int list_index = REF_LIST_0; |
2387 | 0 | (uint32_t)list_index < num_of_list_to_search && (use_me_pu || me_cand_offset == 0); |
2388 | 0 | ++list_index) { |
2389 | | //ME was skipped, so do not add this Unipred candidate |
2390 | 0 | if (blk_do_ref[list_index] == 0) { |
2391 | 0 | continue; |
2392 | 0 | } |
2393 | | |
2394 | 0 | if (me_prune_th > 0) { |
2395 | 0 | uint32_t current_to_best_dist_distance = (me_ctx->p_sb_best_sad[list_index][ref_pic_idx][n_idx] - |
2396 | 0 | best_me_dist) * |
2397 | 0 | 100; |
2398 | 0 | if (current_to_best_dist_distance > (best_me_dist * me_prune_th)) { |
2399 | 0 | blk_do_ref[list_index] = 0; |
2400 | 0 | continue; |
2401 | 0 | } |
2402 | 0 | } |
2403 | 0 | if (min_dist_list != -1 && min_dist_list != list_index) { |
2404 | | // Need to save the MV in case bipred is injected |
2405 | 0 | if (use_me_pu) { |
2406 | 0 | pcs->pa_me_data->me_results[sb_index] |
2407 | 0 | ->me_mv_array[pu_index * pcs->pa_me_data->max_refs + |
2408 | 0 | (list_index ? pcs->pa_me_data->max_l0 : 0) + ref_pic_idx] |
2409 | 0 | .as_int = me_ctx->p_sb_best_mv[list_index][ref_pic_idx][n_idx]; |
2410 | 0 | } |
2411 | 0 | continue; |
2412 | 0 | } |
2413 | 0 | if (use_me_pu) { |
2414 | 0 | me_candidate_array[me_cand_offset].direction = list_index; |
2415 | 0 | me_candidate_array[me_cand_offset].ref_idx_l0 = ref_pic_idx; |
2416 | 0 | me_candidate_array[me_cand_offset].ref_idx_l1 = ref_pic_idx; |
2417 | 0 | me_candidate_array[me_cand_offset].ref0_list = list_index == 0 ? list_index : 24; |
2418 | 0 | me_candidate_array[me_cand_offset].ref1_list = list_index == 1 ? list_index : 24; |
2419 | |
|
2420 | 0 | pcs->pa_me_data->me_results[sb_index] |
2421 | 0 | ->me_mv_array[pu_index * pcs->pa_me_data->max_refs + (list_index ? pcs->pa_me_data->max_l0 : 0) + |
2422 | 0 | ref_pic_idx] |
2423 | 0 | .as_int = me_ctx->p_sb_best_mv[list_index][ref_pic_idx][n_idx]; |
2424 | 0 | } |
2425 | |
|
2426 | 0 | me_cand_offset++; |
2427 | 0 | } |
2428 | | |
2429 | | // Can have up to one bipred cand (LAST ,BWD) |
2430 | 0 | if (blk_do_ref[REF_LIST_0] && blk_do_ref[REF_LIST_1] && use_me_pu) { |
2431 | | // If get here, will have 3 candidates, since both unipred directions are valid |
2432 | 0 | assert(num_of_list_to_search == 2); |
2433 | 0 | me_candidate_array[me_cand_offset].direction = BI_PRED; |
2434 | 0 | me_candidate_array[me_cand_offset].ref_idx_l0 = ref_pic_idx; |
2435 | 0 | me_candidate_array[me_cand_offset].ref_idx_l1 = ref_pic_idx; |
2436 | 0 | me_candidate_array[me_cand_offset].ref0_list = REFERENCE_PIC_LIST_0; |
2437 | 0 | me_candidate_array[me_cand_offset].ref1_list = REFERENCE_PIC_LIST_1; |
2438 | | |
2439 | | // store total me candidate count |
2440 | 0 | pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index[pu_index] = me_cand_offset + 1; |
2441 | 0 | } |
2442 | 0 | } |
2443 | 0 | } |
2444 | | |
2445 | | static void construct_me_candidate_array_single_ref(PictureParentControlSet* pcs, MeContext* ctx, |
2446 | 0 | uint32_t num_of_list_to_search, uint32_t sb_index) { |
2447 | | // This function should only be called if there is one ref frame in list 0 |
2448 | 0 | assert(ctx->num_of_ref_pic_to_search[0] == 1); |
2449 | 0 | assert(ctx->num_of_ref_pic_to_search[1] == 0); |
2450 | 0 | const uint8_t ref_pic_idx = 0; |
2451 | | |
2452 | | // Set whether the reference from each list is allowed |
2453 | 0 | uint8_t blk_do_ref = ctx->search_results[REF_LIST_0][0].do_ref; |
2454 | |
|
2455 | 0 | if (num_of_list_to_search < 2 || !ctx->search_results[REF_LIST_1][0].do_ref) { |
2456 | 0 | num_of_list_to_search = 1; |
2457 | 0 | } |
2458 | | |
2459 | | // Set the count to 1 for all PUs using memset, which is faster than setting at the end of each loop. The count will only need |
2460 | | // to be updated if both reference frames are allowed. |
2461 | 0 | uint8_t number_of_pus = pcs->enable_me_16x16 |
2462 | 0 | ? pcs->enable_me_8x8 ? pcs->max_number_of_pus_per_sb : MAX_SB64_PU_COUNT_NO_8X8 |
2463 | 0 | : MAX_SB64_PU_COUNT_WO_16X16; |
2464 | 0 | memset(pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index, 1, number_of_pus); |
2465 | |
|
2466 | 0 | for (uint8_t n_idx = 0; n_idx < pcs->max_number_of_pus_per_sb; ++n_idx) { |
2467 | 0 | const uint8_t pu_index = z_to_raster[n_idx]; |
2468 | |
|
2469 | 0 | uint8_t use_me_pu = pcs->enable_me_16x16 ? pcs->enable_me_8x8 || n_idx < MAX_SB64_PU_COUNT_NO_8X8 |
2470 | 0 | : n_idx < MAX_SB64_PU_COUNT_WO_16X16; |
2471 | 0 | MeCandidate* me_candidate_array = NULL; |
2472 | 0 | if (use_me_pu) { |
2473 | 0 | me_candidate_array = |
2474 | 0 | &pcs->pa_me_data->me_results[sb_index]->me_candidate_array[pu_index * pcs->pa_me_data->max_cand]; |
2475 | 0 | } |
2476 | 0 | ctx->me_distortion[pu_index] = ctx->p_sb_best_sad[REF_LIST_0][ref_pic_idx][n_idx]; |
2477 | 0 | ; |
2478 | | |
2479 | | //ME was skipped, so do not add this Unipred candidate |
2480 | 0 | if (blk_do_ref == 0) { |
2481 | 0 | continue; |
2482 | 0 | } |
2483 | | |
2484 | 0 | if (use_me_pu) { |
2485 | 0 | me_candidate_array[0].direction = REF_LIST_0; |
2486 | 0 | me_candidate_array[0].ref_idx_l0 = ref_pic_idx; |
2487 | 0 | me_candidate_array[0].ref_idx_l1 = ref_pic_idx; |
2488 | 0 | me_candidate_array[0].ref0_list = 0; |
2489 | 0 | me_candidate_array[0].ref1_list = 0; |
2490 | |
|
2491 | 0 | pcs->pa_me_data->me_results[sb_index] |
2492 | 0 | ->me_mv_array[pu_index * pcs->pa_me_data->max_refs + ref_pic_idx] |
2493 | 0 | .as_int = ctx->p_sb_best_mv[0][ref_pic_idx][n_idx]; |
2494 | 0 | } |
2495 | 0 | } |
2496 | 0 | } |
2497 | | |
2498 | | static void construct_me_candidate_array(PictureParentControlSet* pcs, MeContext* me_ctx, |
2499 | 0 | uint32_t num_of_list_to_search, uint32_t sb_index) { |
2500 | 0 | for (uint32_t n_idx = 0; n_idx < pcs->max_number_of_pus_per_sb; ++n_idx) { |
2501 | 0 | uint8_t pu_index = (n_idx > 4) ? z_to_raster[n_idx] : n_idx; |
2502 | 0 | uint8_t me_cand_offset = 0; |
2503 | |
|
2504 | 0 | uint8_t use_me_pu = pcs->enable_me_16x16 ? pcs->enable_me_8x8 || n_idx < MAX_SB64_PU_COUNT_NO_8X8 |
2505 | 0 | : n_idx < MAX_SB64_PU_COUNT_WO_16X16; |
2506 | 0 | MeCandidate* me_candidate_array = NULL; |
2507 | 0 | if (use_me_pu) { |
2508 | 0 | me_candidate_array = |
2509 | 0 | &pcs->pa_me_data->me_results[sb_index]->me_candidate_array[pu_index * pcs->pa_me_data->max_cand]; |
2510 | 0 | } |
2511 | 0 | uint8_t blk_do_ref[MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX]; |
2512 | 0 | uint32_t current_to_best_dist_distance; |
2513 | 0 | const uint32_t me_prune_th = me_ctx->prune_me_candidates_th; //to change to 32bit |
2514 | 0 | uint32_t best_me_dist = (uint32_t)~0; |
2515 | | |
2516 | | // Determine the best ME distortion |
2517 | 0 | for (uint32_t list_index = REF_LIST_0; list_index < num_of_list_to_search; list_index++) { |
2518 | 0 | const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
2519 | 0 | for (uint32_t ref_pic = 0; ref_pic < num_of_ref_pic_to_search; ref_pic++) { |
2520 | 0 | blk_do_ref[list_index][ref_pic] = me_ctx->search_results[list_index][ref_pic].do_ref; |
2521 | 0 | if (blk_do_ref[list_index][ref_pic] == 0) { |
2522 | 0 | continue; |
2523 | 0 | } |
2524 | | |
2525 | 0 | best_me_dist = me_ctx->p_sb_best_sad[list_index][ref_pic][n_idx] < best_me_dist |
2526 | 0 | ? me_ctx->p_sb_best_sad[list_index][ref_pic][n_idx] |
2527 | 0 | : best_me_dist; |
2528 | 0 | } |
2529 | 0 | } |
2530 | |
|
2531 | 0 | me_ctx->me_distortion[pu_index] = best_me_dist; |
2532 | | // Unipred candidates |
2533 | 0 | for (uint32_t list_index = REF_LIST_0; list_index < num_of_list_to_search && (use_me_pu || me_cand_offset == 0); |
2534 | 0 | ++list_index) { |
2535 | 0 | const uint8_t num_of_ref_pic_to_search = me_ctx->num_of_ref_pic_to_search[list_index]; |
2536 | |
|
2537 | 0 | for (uint32_t ref_pic_index = 0; |
2538 | 0 | (ref_pic_index < num_of_ref_pic_to_search) && (use_me_pu || (me_cand_offset == 0)); |
2539 | 0 | ++ref_pic_index) { |
2540 | | //ME was skipped, so do not add this Unipred candidate |
2541 | 0 | if (blk_do_ref[list_index][ref_pic_index] == 0) { |
2542 | 0 | continue; |
2543 | 0 | } |
2544 | | |
2545 | 0 | if (me_prune_th > 0) { |
2546 | 0 | current_to_best_dist_distance = (me_ctx->p_sb_best_sad[list_index][ref_pic_index][n_idx] - |
2547 | 0 | best_me_dist) * |
2548 | 0 | 100; |
2549 | 0 | if (current_to_best_dist_distance > (best_me_dist * me_prune_th)) { |
2550 | 0 | blk_do_ref[list_index][ref_pic_index] = 0; |
2551 | 0 | continue; |
2552 | 0 | } |
2553 | 0 | } |
2554 | 0 | if (use_me_pu) { |
2555 | 0 | me_candidate_array[me_cand_offset].direction = list_index; |
2556 | 0 | me_candidate_array[me_cand_offset].ref_idx_l0 = ref_pic_index; |
2557 | 0 | me_candidate_array[me_cand_offset].ref_idx_l1 = ref_pic_index; |
2558 | 0 | me_candidate_array[me_cand_offset].ref0_list = list_index == 0 ? list_index : 24; |
2559 | 0 | me_candidate_array[me_cand_offset].ref1_list = list_index == 1 ? list_index : 24; |
2560 | |
|
2561 | 0 | pcs->pa_me_data->me_results[sb_index] |
2562 | 0 | ->me_mv_array[pu_index * pcs->pa_me_data->max_refs + |
2563 | 0 | (list_index ? pcs->pa_me_data->max_l0 : 0) + ref_pic_index] |
2564 | 0 | .as_int = me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]; |
2565 | 0 | } |
2566 | 0 | me_cand_offset++; |
2567 | 0 | } |
2568 | 0 | } |
2569 | 0 | if (num_of_list_to_search == 2 && use_me_pu) { |
2570 | | // 1st set of BIPRED cand |
2571 | | // (LAST ,BWD), (LAST,ALT ), (LAST,ALT2 ) |
2572 | | // (LAST2,BWD), (LAST2,ALT), (LAST2,ALT2) |
2573 | | // (LAST3,BWD), (LAST3,ALT), (LAST3,ALT2) |
2574 | | // (GOLD ,BWD), (GOLD,ALT ), (GOLD,ALT2 ) |
2575 | 0 | for (uint32_t first_list_ref_pict_idx = 0; |
2576 | 0 | first_list_ref_pict_idx < me_ctx->num_of_ref_pic_to_search[REF_LIST_0]; |
2577 | 0 | first_list_ref_pict_idx++) { |
2578 | 0 | for (uint32_t second_list_ref_pict_idx = 0; |
2579 | 0 | second_list_ref_pict_idx < me_ctx->num_of_ref_pic_to_search[REF_LIST_1]; |
2580 | 0 | second_list_ref_pict_idx++) { |
2581 | 0 | if (pcs->scs->mrp_ctrls.only_l_bwd && |
2582 | 0 | (first_list_ref_pict_idx > 0 || second_list_ref_pict_idx > 0)) { |
2583 | 0 | continue; |
2584 | 0 | } |
2585 | 0 | if (blk_do_ref[REF_LIST_0][first_list_ref_pict_idx] && |
2586 | 0 | blk_do_ref[REF_LIST_1][second_list_ref_pict_idx]) { |
2587 | 0 | me_candidate_array[me_cand_offset].direction = BI_PRED; |
2588 | 0 | me_candidate_array[me_cand_offset].ref_idx_l0 = first_list_ref_pict_idx; |
2589 | 0 | me_candidate_array[me_cand_offset].ref_idx_l1 = second_list_ref_pict_idx; |
2590 | 0 | me_candidate_array[me_cand_offset].ref0_list = REFERENCE_PIC_LIST_0; |
2591 | 0 | me_candidate_array[me_cand_offset].ref1_list = REFERENCE_PIC_LIST_1; |
2592 | 0 | me_cand_offset++; |
2593 | 0 | } |
2594 | 0 | } |
2595 | 0 | } |
2596 | 0 | if (!pcs->scs->mrp_ctrls.only_l_bwd) { |
2597 | | // 2nd set of BIPRED cand: (LAST,LAST2) (LAST,LAST3) (LAST,GOLD) |
2598 | 0 | for (uint32_t first_list_ref_pict_idx = 1; |
2599 | 0 | first_list_ref_pict_idx < me_ctx->num_of_ref_pic_to_search[REF_LIST_0]; |
2600 | 0 | first_list_ref_pict_idx++) { |
2601 | 0 | if (blk_do_ref[REF_LIST_0][0] && blk_do_ref[REF_LIST_0][first_list_ref_pict_idx]) { |
2602 | 0 | me_candidate_array[me_cand_offset].direction = BI_PRED; |
2603 | 0 | me_candidate_array[me_cand_offset].ref_idx_l0 = 0; |
2604 | 0 | me_candidate_array[me_cand_offset].ref_idx_l1 = first_list_ref_pict_idx; |
2605 | 0 | me_candidate_array[me_cand_offset].ref0_list = REFERENCE_PIC_LIST_0; |
2606 | 0 | me_candidate_array[me_cand_offset].ref1_list = REFERENCE_PIC_LIST_0; |
2607 | 0 | me_cand_offset++; |
2608 | 0 | } |
2609 | 0 | } |
2610 | 0 | } |
2611 | | |
2612 | | // 3rd set of BIPRED cand: (BWD, ALT) |
2613 | 0 | if (!pcs->scs->mrp_ctrls.only_l_bwd) { |
2614 | 0 | if (me_ctx->num_of_ref_pic_to_search[REF_LIST_1] == 3 && blk_do_ref[REF_LIST_1][0] && |
2615 | 0 | blk_do_ref[REF_LIST_1][2]) { |
2616 | 0 | { |
2617 | 0 | me_candidate_array[me_cand_offset].direction = BI_PRED; |
2618 | 0 | me_candidate_array[me_cand_offset].ref_idx_l0 = 0; |
2619 | 0 | me_candidate_array[me_cand_offset].ref_idx_l1 = 2; |
2620 | 0 | me_candidate_array[me_cand_offset].ref0_list = REFERENCE_PIC_LIST_1; |
2621 | 0 | me_candidate_array[me_cand_offset].ref1_list = REFERENCE_PIC_LIST_1; |
2622 | 0 | me_cand_offset++; |
2623 | 0 | } |
2624 | 0 | } |
2625 | 0 | } |
2626 | 0 | } |
2627 | | |
2628 | | // store total me candidate count |
2629 | 0 | if (use_me_pu) { |
2630 | 0 | pcs->pa_me_data->me_results[sb_index]->total_me_candidate_index[pu_index] = me_cand_offset; |
2631 | 0 | } |
2632 | 0 | } |
2633 | 0 | } |
2634 | | |
2635 | | // Active and stationary detection for global motion |
2636 | | static void perform_gm_detection( |
2637 | | PictureParentControlSet* pcs, // input parameter, Picture Control Set Ptr |
2638 | | uint32_t sb_index, // input parameter, SB Index |
2639 | | MeContext* me_ctx // input parameter, ME Context Ptr, used to store decimated/interpolated SB/SR |
2640 | 0 | ) { |
2641 | 0 | SequenceControlSet* scs = pcs->scs; |
2642 | 0 | uint64_t per_sig_cnt[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH][NUM_MV_COMPONENTS][NUM_MV_HIST]; |
2643 | 0 | uint64_t tot_cnt = 0; |
2644 | 0 | svt_memset(per_sig_cnt, 0, sizeof(per_sig_cnt)); |
2645 | |
|
2646 | 0 | if (scs->input_resolution <= INPUT_SIZE_480p_RANGE) { |
2647 | 0 | for (unsigned i = 0; i < 64; i++) { |
2648 | 0 | uint8_t n_idx = 21 + i; |
2649 | 0 | if (!pcs->enable_me_8x8) { |
2650 | 0 | if (n_idx >= MAX_SB64_PU_COUNT_NO_8X8) { |
2651 | 0 | n_idx = me_idx_85_8x8_to_16x16_conversion[n_idx - MAX_SB64_PU_COUNT_NO_8X8]; |
2652 | 0 | } |
2653 | 0 | if (!pcs->enable_me_16x16) { |
2654 | 0 | if (n_idx >= MAX_SB64_PU_COUNT_WO_16X16) { |
2655 | 0 | n_idx = me_idx_16x16_to_parent_32x32_conversion[n_idx - MAX_SB64_PU_COUNT_WO_16X16]; |
2656 | 0 | } |
2657 | 0 | } |
2658 | 0 | } |
2659 | 0 | MeCandidate* me_candidate = &( |
2660 | 0 | pcs->pa_me_data->me_results[sb_index]->me_candidate_array[n_idx * pcs->pa_me_data->max_cand]); |
2661 | |
|
2662 | 0 | uint32_t list_index = (me_candidate->direction == 0 || me_candidate->direction == 2) |
2663 | 0 | ? me_candidate->ref0_list |
2664 | 0 | : me_candidate->ref1_list; |
2665 | 0 | uint32_t ref_pic_index = (me_candidate->direction == 0 || me_candidate->direction == 2) |
2666 | 0 | ? me_candidate->ref_idx_l0 |
2667 | 0 | : me_candidate->ref_idx_l1; |
2668 | | |
2669 | | // Active block detection |
2670 | 0 | const int active_th = 4; |
2671 | 0 | int mx = _MVXT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2; |
2672 | 0 | if (mx < -active_th) { |
2673 | 0 | per_sig_cnt[list_index][ref_pic_index][0][0]++; |
2674 | 0 | } else if (mx > active_th) { |
2675 | 0 | per_sig_cnt[list_index][ref_pic_index][0][1]++; |
2676 | 0 | } |
2677 | 0 | int my = _MVYT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2; |
2678 | 0 | if (my < -active_th) { |
2679 | 0 | per_sig_cnt[list_index][ref_pic_index][1][0]++; |
2680 | 0 | } else if (my > active_th) { |
2681 | 0 | per_sig_cnt[list_index][ref_pic_index][1][1]++; |
2682 | 0 | } |
2683 | |
|
2684 | 0 | tot_cnt++; |
2685 | 0 | } |
2686 | 0 | } else { |
2687 | 0 | for (unsigned i = 0; i < 16; i++) { |
2688 | 0 | uint8_t n_idx = 5 + i; |
2689 | 0 | if (!pcs->enable_me_16x16) { |
2690 | 0 | if (n_idx >= MAX_SB64_PU_COUNT_WO_16X16) { |
2691 | 0 | n_idx = me_idx_16x16_to_parent_32x32_conversion[n_idx - MAX_SB64_PU_COUNT_WO_16X16]; |
2692 | 0 | } |
2693 | 0 | } |
2694 | 0 | MeCandidate* me_candidate = &( |
2695 | 0 | pcs->pa_me_data->me_results[sb_index]->me_candidate_array[n_idx * pcs->pa_me_data->max_cand]); |
2696 | |
|
2697 | 0 | uint32_t list_index = (me_candidate->direction == 0 || me_candidate->direction == 2) |
2698 | 0 | ? me_candidate->ref0_list |
2699 | 0 | : me_candidate->ref1_list; |
2700 | 0 | uint32_t ref_pic_index = (me_candidate->direction == 0 || me_candidate->direction == 2) |
2701 | 0 | ? me_candidate->ref_idx_l0 |
2702 | 0 | : me_candidate->ref_idx_l1; |
2703 | | |
2704 | | // Active block detection |
2705 | 0 | const int active_th = 32; |
2706 | 0 | int mx = _MVXT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2; |
2707 | 0 | if (mx < -active_th) { |
2708 | 0 | per_sig_cnt[list_index][ref_pic_index][0][0]++; |
2709 | 0 | } else if (mx > active_th) { |
2710 | 0 | per_sig_cnt[list_index][ref_pic_index][0][1]++; |
2711 | 0 | } |
2712 | 0 | int my = _MVYT(me_ctx->p_sb_best_mv[list_index][ref_pic_index][n_idx]) << 2; |
2713 | 0 | if (my < -active_th) { |
2714 | 0 | per_sig_cnt[list_index][ref_pic_index][1][0]++; |
2715 | 0 | } else if (my > active_th) { |
2716 | 0 | per_sig_cnt[list_index][ref_pic_index][1][1]++; |
2717 | 0 | } |
2718 | |
|
2719 | 0 | tot_cnt++; |
2720 | 0 | } |
2721 | 0 | } |
2722 | |
|
2723 | 0 | for (int l = 0; l < MAX_NUM_OF_REF_PIC_LIST; l++) { |
2724 | 0 | for (int r = 0; r < REF_LIST_MAX_DEPTH; r++) { |
2725 | 0 | for (int c = 0; c < NUM_MV_COMPONENTS; c++) { |
2726 | 0 | for (int s = 0; s < NUM_MV_HIST; s++) { |
2727 | 0 | if (per_sig_cnt[l][r][c][s] > (tot_cnt / 2)) { |
2728 | 0 | pcs->rc_me_allow_gm[sb_index] = 1; |
2729 | 0 | break; |
2730 | 0 | } |
2731 | 0 | } |
2732 | 0 | } |
2733 | 0 | } |
2734 | 0 | } |
2735 | 0 | } |
2736 | | |
2737 | | // Compute the distortion per block size based on the ME results |
2738 | | static void compute_distortion( |
2739 | | PictureParentControlSet* pcs, // input parameter, Picture Control Set Ptr |
2740 | | uint32_t b64_index, // input parameter, B64 Index |
2741 | | MeContext* me_ctx // input parameter, ME Context Ptr, used to store decimated/interpolated SB/SR |
2742 | 0 | ) { |
2743 | 0 | SequenceControlSet* scs = pcs->scs; |
2744 | | // Determine sb_64x64_me_class |
2745 | 0 | B64Geom* b64_geom = &pcs->b64_geom[b64_index]; |
2746 | 0 | uint32_t b64_size = 64 * 64; |
2747 | 0 | uint32_t dist_64x64 = 0, dist_32x32 = 0, dist_16x16 = 0, dist_8x8 = 0; |
2748 | | |
2749 | | // 64x64 |
2750 | 0 | { dist_64x64 = me_ctx->me_distortion[0]; } |
2751 | | |
2752 | | // 32x32 |
2753 | 0 | for (unsigned i = 0; i < 4; i++) { |
2754 | 0 | dist_32x32 += me_ctx->me_distortion[1 + i]; |
2755 | 0 | } |
2756 | | |
2757 | | // 16x16 |
2758 | 0 | for (unsigned i = 0; i < 16; i++) { |
2759 | 0 | dist_16x16 += me_ctx->me_distortion[5 + i]; |
2760 | 0 | } |
2761 | | |
2762 | | // 8x8 |
2763 | 0 | for (unsigned i = 0; i < 64; i++) { |
2764 | 0 | dist_8x8 += me_ctx->me_distortion[21 + i]; |
2765 | 0 | } |
2766 | |
|
2767 | 0 | uint64_t mean_dist_8x8 = dist_8x8 / 64; |
2768 | 0 | uint64_t sum_ofsq_dist_8x8 = 0; |
2769 | 0 | for (unsigned i = 0; i < 64; i++) { |
2770 | 0 | const int64_t diff = ((int64_t)me_ctx->me_distortion[21 + i] - (int64_t)mean_dist_8x8); |
2771 | 0 | sum_ofsq_dist_8x8 += diff * diff; |
2772 | 0 | } |
2773 | |
|
2774 | 0 | pcs->me_8x8_cost_variance[b64_index] = (uint32_t)(sum_ofsq_dist_8x8 / 64); |
2775 | | // Compute the sum of the distortion of all 16 16x16 (720 and above) and |
2776 | | // 64 8x8 (for lower resolutions) blocks in the SB |
2777 | 0 | pcs->rc_me_distortion[b64_index] = (scs->input_resolution <= INPUT_SIZE_480p_RANGE) ? dist_8x8 : dist_16x16; |
2778 | 0 | const uint32_t pix_num = b64_geom->width * b64_geom->height; |
2779 | | // Normalize |
2780 | 0 | pcs->me_64x64_distortion[b64_index] = (dist_64x64 * b64_size) / (pix_num); |
2781 | 0 | pcs->me_32x32_distortion[b64_index] = (dist_32x32 * b64_size) / (pix_num); |
2782 | 0 | pcs->me_16x16_distortion[b64_index] = (dist_16x16 * b64_size) / (pix_num); |
2783 | 0 | pcs->me_8x8_distortion[b64_index] = (dist_8x8 * b64_size) / (pix_num); |
2784 | 0 | } |
2785 | | |
2786 | | // Initialize data used in ME/HME |
2787 | 0 | static INLINE void init_me_hme_data(MeContext* me_ctx) { |
2788 | | // Initialize HME search centres to 0 |
2789 | 0 | if (me_ctx->enable_hme_flag) { |
2790 | 0 | svt_memset(me_ctx->x_hme_level0_search_center, 0, sizeof(me_ctx->x_hme_level0_search_center)); |
2791 | 0 | svt_memset(me_ctx->y_hme_level0_search_center, 0, sizeof(me_ctx->y_hme_level0_search_center)); |
2792 | |
|
2793 | 0 | svt_memset(me_ctx->x_hme_level1_search_center, 0, sizeof(me_ctx->x_hme_level1_search_center)); |
2794 | 0 | svt_memset(me_ctx->y_hme_level1_search_center, 0, sizeof(me_ctx->y_hme_level1_search_center)); |
2795 | |
|
2796 | 0 | svt_memset(me_ctx->x_hme_level2_search_center, 0, sizeof(me_ctx->x_hme_level2_search_center)); |
2797 | 0 | svt_memset(me_ctx->y_hme_level2_search_center, 0, sizeof(me_ctx->y_hme_level2_search_center)); |
2798 | 0 | } |
2799 | | |
2800 | | // R2R FIX: no winner integer MV is set in special case like initial p_sb_best_mv for overlay case, |
2801 | | // then it sends dirty p_sb_best_mv to MD, initializing it is necessary |
2802 | 0 | svt_memset(me_ctx->p_sb_best_mv, 0, sizeof(me_ctx->p_sb_best_mv)); |
2803 | | |
2804 | | //init hme results buffer |
2805 | 0 | for (uint32_t li = 0; li < MAX_NUM_OF_REF_PIC_LIST; li++) { |
2806 | 0 | for (uint32_t ri = 0; ri < REF_LIST_MAX_DEPTH; ri++) { |
2807 | 0 | if (me_ctx->me_type != ME_MCTF) { |
2808 | 0 | me_ctx->search_results[li][ri].list_i = li; |
2809 | 0 | } |
2810 | 0 | me_ctx->search_results[li][ri].ref_i = ri; |
2811 | 0 | me_ctx->search_results[li][ri].do_ref = 1; |
2812 | 0 | me_ctx->search_results[li][ri].hme_sad = MAX_U32; |
2813 | 0 | me_ctx->reduce_me_sr_divisor[li][ri] = 1; |
2814 | 0 | me_ctx->zz_sad[li][ri] = (uint32_t)~0; |
2815 | 0 | me_ctx->prehme_data[li][ri][0].valid = 0; |
2816 | 0 | me_ctx->prehme_data[li][ri][1].valid = 0; |
2817 | 0 | } |
2818 | 0 | } |
2819 | 0 | svt_memset(me_ctx->performed_phme, 0, sizeof(me_ctx->performed_phme)); |
2820 | 0 | } |
2821 | | |
2822 | | /******************************************* |
2823 | | * motion_estimation |
2824 | | * performs ME on 64x64 blocks |
2825 | | *******************************************/ |
2826 | | |
2827 | | EbErrorType svt_aom_motion_estimation_b64( |
2828 | | PictureParentControlSet* pcs, // input parameter, Picture Control Set Ptr |
2829 | | uint32_t b64_index, // input parameter, SB Index |
2830 | | uint32_t b64_origin_x, // input parameter, SB Origin X |
2831 | | uint32_t b64_origin_y, // input parameter, SB Origin X |
2832 | | MeContext* me_ctx, // input parameter, ME Context Ptr, used to store decimated/interpolated SB/SR |
2833 | | EbPictureBufferDesc* input_ptr) // input parameter, source Picture Ptr |
2834 | | |
2835 | 0 | { |
2836 | 0 | EbErrorType return_error = EB_ErrorNone; |
2837 | |
|
2838 | 0 | uint32_t num_of_list_to_search = me_ctx->num_of_list_to_search; |
2839 | | |
2840 | | // input picture width and height might be disaligned after resizing |
2841 | | // we use aligned width and height to avoid disalignment of calculation |
2842 | | // of block size |
2843 | 0 | uint16_t aligned_width = (uint16_t)ALIGN_POWER_OF_TWO(input_ptr->width, 3); |
2844 | 0 | uint16_t aligned_height = (uint16_t)ALIGN_POWER_OF_TWO(input_ptr->height, 3); |
2845 | 0 | me_ctx->b64_width = (aligned_width - b64_origin_x) < BLOCK_SIZE_64 ? aligned_width - b64_origin_x : BLOCK_SIZE_64; |
2846 | 0 | me_ctx->b64_height = (aligned_height - b64_origin_y) < BLOCK_SIZE_64 ? aligned_height - b64_origin_y |
2847 | 0 | : BLOCK_SIZE_64; |
2848 | | |
2849 | | //pruning of the references is not done for alt-ref / when HMeLevel2 not done |
2850 | 0 | uint8_t prune_ref = me_ctx->enable_hme_flag && me_ctx->me_type != ME_MCTF; |
2851 | | // Initialize ME/HME buffers |
2852 | 0 | init_me_hme_data(me_ctx); |
2853 | | // HME: Perform Hierarchical Motion Estimation for all reference frames for the current 64x64 block. |
2854 | 0 | hme_b64(pcs, b64_origin_x, b64_origin_y, me_ctx, input_ptr); |
2855 | |
|
2856 | 0 | if (me_ctx->me_type == ME_MCTF && me_ctx->search_results[0][0].hme_sad < me_ctx->tf_me_exit_th) { |
2857 | 0 | me_ctx->tf_use_pred_64x64_only_th = (uint8_t)~0; |
2858 | 0 | return return_error; |
2859 | 0 | } |
2860 | | // prune the reference frames based on the HME outputs. |
2861 | 0 | if (prune_ref) { |
2862 | 0 | hme_prune_ref_and_adjust_sr(me_ctx); |
2863 | 0 | } |
2864 | | // Full pel: Perform the Integer Motion Estimation on the allowed reference frames. |
2865 | 0 | integer_search_b64(pcs, me_ctx, b64_origin_x, b64_origin_y, input_ptr); |
2866 | | |
2867 | | // prune the reference frames |
2868 | 0 | if (prune_ref && me_ctx->me_hme_prune_ctrls.enable_me_hme_ref_pruning) { |
2869 | 0 | me_prune_ref(me_ctx); |
2870 | 0 | } |
2871 | |
|
2872 | 0 | if (me_ctx->me_type != ME_MCTF) { |
2873 | 0 | { |
2874 | 0 | if (me_ctx->num_of_ref_pic_to_search[REF_LIST_0] == 1 && |
2875 | 0 | me_ctx->num_of_ref_pic_to_search[REF_LIST_1] == 0) { |
2876 | 0 | construct_me_candidate_array_single_ref(pcs, me_ctx, num_of_list_to_search, b64_index); |
2877 | 0 | } else if (me_ctx->num_of_ref_pic_to_search[REF_LIST_0] == 1 && |
2878 | 0 | me_ctx->num_of_ref_pic_to_search[REF_LIST_1] == 1) { |
2879 | 0 | construct_me_candidate_array_mrp_off(pcs, me_ctx, num_of_list_to_search, b64_index); |
2880 | 0 | } else { |
2881 | 0 | construct_me_candidate_array(pcs, me_ctx, num_of_list_to_search, b64_index); |
2882 | 0 | } |
2883 | 0 | } |
2884 | | // Save the distortion per block size |
2885 | 0 | compute_distortion(pcs, b64_index, me_ctx); |
2886 | | |
2887 | | // Perform GM detection if GM is enabled |
2888 | 0 | pcs->rc_me_allow_gm[b64_index] = 0; |
2889 | |
|
2890 | 0 | if (pcs->gm_ctrls.enabled) { |
2891 | 0 | perform_gm_detection(pcs, b64_index, me_ctx); |
2892 | 0 | } |
2893 | 0 | } |
2894 | 0 | return return_error; |
2895 | 0 | } |