/src/aom/av1/encoder/var_based_part.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2019, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdbool.h> |
15 | | #include <stdio.h> |
16 | | |
17 | | #include "config/aom_config.h" |
18 | | #include "config/aom_dsp_rtcd.h" |
19 | | #include "config/av1_rtcd.h" |
20 | | |
21 | | #include "aom_dsp/aom_dsp_common.h" |
22 | | #include "aom_dsp/binary_codes_writer.h" |
23 | | #include "aom_ports/mem.h" |
24 | | #include "aom_ports/aom_timer.h" |
25 | | |
26 | | #include "av1/common/reconinter.h" |
27 | | #include "av1/common/blockd.h" |
28 | | #include "av1/common/quant_common.h" |
29 | | |
30 | | #include "av1/encoder/encodeframe.h" |
31 | | #include "av1/encoder/encodeframe_utils.h" |
32 | | #include "av1/encoder/var_based_part.h" |
33 | | #include "av1/encoder/reconinter_enc.h" |
34 | | #include "av1/encoder/rdopt_utils.h" |
35 | | |
36 | | // Possible values for the force_split variable while evaluating variance based |
37 | | // partitioning. |
38 | | enum { |
39 | | // Evaluate all partition types |
40 | | PART_EVAL_ALL = 0, |
41 | | // Force PARTITION_SPLIT |
42 | | PART_EVAL_ONLY_SPLIT = 1, |
43 | | // Force PARTITION_NONE |
44 | | PART_EVAL_ONLY_NONE = 2 |
45 | | } UENUM1BYTE(PART_EVAL_STATUS); |
46 | | |
47 | | typedef struct { |
48 | | VPVariance *part_variances; |
49 | | VPartVar *split[4]; |
50 | | } variance_node; |
51 | | |
52 | | static inline void tree_to_node(void *data, BLOCK_SIZE bsize, |
53 | 0 | variance_node *node) { |
54 | 0 | node->part_variances = NULL; |
55 | 0 | switch (bsize) { |
56 | 0 | case BLOCK_128X128: { |
57 | 0 | VP128x128 *vt = (VP128x128 *)data; |
58 | 0 | node->part_variances = &vt->part_variances; |
59 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) |
60 | 0 | node->split[split_idx] = &vt->split[split_idx].part_variances.none; |
61 | 0 | break; |
62 | 0 | } |
63 | 0 | case BLOCK_64X64: { |
64 | 0 | VP64x64 *vt = (VP64x64 *)data; |
65 | 0 | node->part_variances = &vt->part_variances; |
66 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) |
67 | 0 | node->split[split_idx] = &vt->split[split_idx].part_variances.none; |
68 | 0 | break; |
69 | 0 | } |
70 | 0 | case BLOCK_32X32: { |
71 | 0 | VP32x32 *vt = (VP32x32 *)data; |
72 | 0 | node->part_variances = &vt->part_variances; |
73 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) |
74 | 0 | node->split[split_idx] = &vt->split[split_idx].part_variances.none; |
75 | 0 | break; |
76 | 0 | } |
77 | 0 | case BLOCK_16X16: { |
78 | 0 | VP16x16 *vt = (VP16x16 *)data; |
79 | 0 | node->part_variances = &vt->part_variances; |
80 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) |
81 | 0 | node->split[split_idx] = &vt->split[split_idx].part_variances.none; |
82 | 0 | break; |
83 | 0 | } |
84 | 0 | case BLOCK_8X8: { |
85 | 0 | VP8x8 *vt = (VP8x8 *)data; |
86 | 0 | node->part_variances = &vt->part_variances; |
87 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) |
88 | 0 | node->split[split_idx] = &vt->split[split_idx].part_variances.none; |
89 | 0 | break; |
90 | 0 | } |
91 | 0 | default: { |
92 | 0 | VP4x4 *vt = (VP4x4 *)data; |
93 | 0 | assert(bsize == BLOCK_4X4); |
94 | 0 | node->part_variances = &vt->part_variances; |
95 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) |
96 | 0 | node->split[split_idx] = &vt->split[split_idx]; |
97 | 0 | break; |
98 | 0 | } |
99 | 0 | } |
100 | 0 | } |
101 | | |
102 | | // Set variance values given sum square error, sum error, count. |
103 | 0 | static inline void fill_variance(uint32_t s2, int32_t s, int c, VPartVar *v) { |
104 | 0 | v->sum_square_error = s2; |
105 | 0 | v->sum_error = s; |
106 | 0 | v->log2_count = c; |
107 | 0 | } |
108 | | |
109 | 0 | static inline void get_variance(VPartVar *v) { |
110 | 0 | v->variance = |
111 | 0 | (int)(256 * (v->sum_square_error - |
112 | 0 | (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> |
113 | 0 | v->log2_count)) >> |
114 | 0 | v->log2_count); |
115 | 0 | } |
116 | | |
117 | | static inline void sum_2_variances(const VPartVar *a, const VPartVar *b, |
118 | 0 | VPartVar *r) { |
119 | 0 | assert(a->log2_count == b->log2_count); |
120 | 0 | fill_variance(a->sum_square_error + b->sum_square_error, |
121 | 0 | a->sum_error + b->sum_error, a->log2_count + 1, r); |
122 | 0 | } |
123 | | |
124 | 0 | static inline void fill_variance_tree(void *data, BLOCK_SIZE bsize) { |
125 | 0 | variance_node node; |
126 | 0 | memset(&node, 0, sizeof(node)); |
127 | 0 | tree_to_node(data, bsize, &node); |
128 | 0 | sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); |
129 | 0 | sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); |
130 | 0 | sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); |
131 | 0 | sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); |
132 | 0 | sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], |
133 | 0 | &node.part_variances->none); |
134 | 0 | } |
135 | | |
136 | | static inline void set_block_size(AV1_COMP *const cpi, int mi_row, int mi_col, |
137 | 0 | BLOCK_SIZE bsize) { |
138 | 0 | if (cpi->common.mi_params.mi_cols > mi_col && |
139 | 0 | cpi->common.mi_params.mi_rows > mi_row) { |
140 | 0 | CommonModeInfoParams *mi_params = &cpi->common.mi_params; |
141 | 0 | const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col); |
142 | 0 | const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col); |
143 | 0 | MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] = |
144 | 0 | &mi_params->mi_alloc[mi_alloc_idx]; |
145 | 0 | mi->bsize = bsize; |
146 | 0 | } |
147 | 0 | } |
148 | | |
149 | | static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd, |
150 | | const TileInfo *const tile, void *data, |
151 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
152 | | int64_t threshold, BLOCK_SIZE bsize_min, |
153 | 0 | PART_EVAL_STATUS force_split) { |
154 | 0 | AV1_COMMON *const cm = &cpi->common; |
155 | 0 | variance_node vt; |
156 | 0 | const int block_width = mi_size_wide[bsize]; |
157 | 0 | const int block_height = mi_size_high[bsize]; |
158 | 0 | int bs_width_check = block_width; |
159 | 0 | int bs_height_check = block_height; |
160 | 0 | int bs_width_vert_check = block_width >> 1; |
161 | 0 | int bs_height_horiz_check = block_height >> 1; |
162 | | // On the right and bottom boundary we only need to check |
163 | | // if half the bsize fits, because boundary is extended |
164 | | // up to 64. So do this check only for sb_size = 64X64. |
165 | 0 | if (cm->seq_params->sb_size == BLOCK_64X64) { |
166 | 0 | if (tile->mi_col_end == cm->mi_params.mi_cols) { |
167 | 0 | bs_width_check = (block_width >> 1) + 1; |
168 | 0 | bs_width_vert_check = (block_width >> 2) + 1; |
169 | 0 | } |
170 | 0 | if (tile->mi_row_end == cm->mi_params.mi_rows) { |
171 | 0 | bs_height_check = (block_height >> 1) + 1; |
172 | 0 | bs_height_horiz_check = (block_height >> 2) + 1; |
173 | 0 | } |
174 | 0 | } |
175 | |
|
176 | 0 | assert(block_height == block_width); |
177 | 0 | tree_to_node(data, bsize, &vt); |
178 | |
|
179 | 0 | if (mi_col + bs_width_check <= tile->mi_col_end && |
180 | 0 | mi_row + bs_height_check <= tile->mi_row_end && |
181 | 0 | force_split == PART_EVAL_ONLY_NONE) { |
182 | 0 | set_block_size(cpi, mi_row, mi_col, bsize); |
183 | 0 | return 1; |
184 | 0 | } |
185 | 0 | if (force_split == PART_EVAL_ONLY_SPLIT) return 0; |
186 | | |
187 | | // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if |
188 | | // variance is below threshold, otherwise split will be selected. |
189 | | // No check for vert/horiz split as too few samples for variance. |
190 | 0 | if (bsize == bsize_min) { |
191 | | // Variance already computed to set the force_split. |
192 | 0 | if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
193 | 0 | if (mi_col + bs_width_check <= tile->mi_col_end && |
194 | 0 | mi_row + bs_height_check <= tile->mi_row_end && |
195 | 0 | vt.part_variances->none.variance < threshold) { |
196 | 0 | set_block_size(cpi, mi_row, mi_col, bsize); |
197 | 0 | return 1; |
198 | 0 | } |
199 | 0 | return 0; |
200 | 0 | } else if (bsize > bsize_min) { |
201 | | // Variance already computed to set the force_split. |
202 | 0 | if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
203 | | // For key frame: take split for bsize above 32X32 or very high variance. |
204 | 0 | if (frame_is_intra_only(cm) && |
205 | 0 | (bsize > BLOCK_32X32 || |
206 | 0 | vt.part_variances->none.variance > (threshold << 4))) { |
207 | 0 | return 0; |
208 | 0 | } |
209 | | // If variance is low, take the bsize (no split). |
210 | 0 | if (mi_col + bs_width_check <= tile->mi_col_end && |
211 | 0 | mi_row + bs_height_check <= tile->mi_row_end && |
212 | 0 | vt.part_variances->none.variance < threshold) { |
213 | 0 | set_block_size(cpi, mi_row, mi_col, bsize); |
214 | 0 | return 1; |
215 | 0 | } |
216 | | // Check vertical split. |
217 | 0 | if (mi_row + bs_height_check <= tile->mi_row_end && |
218 | 0 | mi_col + bs_width_vert_check <= tile->mi_col_end) { |
219 | 0 | BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); |
220 | 0 | BLOCK_SIZE plane_bsize = |
221 | 0 | get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x, |
222 | 0 | xd->plane[AOM_PLANE_U].subsampling_y); |
223 | 0 | get_variance(&vt.part_variances->vert[0]); |
224 | 0 | get_variance(&vt.part_variances->vert[1]); |
225 | 0 | if (vt.part_variances->vert[0].variance < threshold && |
226 | 0 | vt.part_variances->vert[1].variance < threshold && |
227 | 0 | plane_bsize < BLOCK_INVALID) { |
228 | 0 | set_block_size(cpi, mi_row, mi_col, subsize); |
229 | 0 | set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize); |
230 | 0 | return 1; |
231 | 0 | } |
232 | 0 | } |
233 | | // Check horizontal split. |
234 | 0 | if (mi_col + bs_width_check <= tile->mi_col_end && |
235 | 0 | mi_row + bs_height_horiz_check <= tile->mi_row_end) { |
236 | 0 | BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); |
237 | 0 | BLOCK_SIZE plane_bsize = |
238 | 0 | get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x, |
239 | 0 | xd->plane[AOM_PLANE_U].subsampling_y); |
240 | 0 | get_variance(&vt.part_variances->horz[0]); |
241 | 0 | get_variance(&vt.part_variances->horz[1]); |
242 | 0 | if (vt.part_variances->horz[0].variance < threshold && |
243 | 0 | vt.part_variances->horz[1].variance < threshold && |
244 | 0 | plane_bsize < BLOCK_INVALID) { |
245 | 0 | set_block_size(cpi, mi_row, mi_col, subsize); |
246 | 0 | set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize); |
247 | 0 | return 1; |
248 | 0 | } |
249 | 0 | } |
250 | 0 | return 0; |
251 | 0 | } |
252 | 0 | return 0; |
253 | 0 | } |
254 | | |
255 | | static inline int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide, |
256 | 0 | int pixels_high) { |
257 | 0 | int all_inside = 1; |
258 | 0 | for (int idx = 0; idx < 4; idx++) { |
259 | 0 | all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide); |
260 | 0 | all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high); |
261 | 0 | } |
262 | 0 | return all_inside; |
263 | 0 | } |
264 | | |
265 | | #if CONFIG_AV1_HIGHBITDEPTH |
266 | | // TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd |
267 | | static inline void fill_variance_8x8avg_highbd( |
268 | | const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, |
269 | | int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide, |
270 | 0 | int pixels_high) { |
271 | 0 | for (int idx = 0; idx < 4; idx++) { |
272 | 0 | const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); |
273 | 0 | const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); |
274 | 0 | unsigned int sse = 0; |
275 | 0 | int sum = 0; |
276 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
277 | 0 | int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, |
278 | 0 | src_stride); |
279 | 0 | int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, |
280 | 0 | dst_stride); |
281 | |
|
282 | 0 | sum = src_avg - dst_avg; |
283 | 0 | sse = sum * sum; |
284 | 0 | } |
285 | 0 | fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none); |
286 | 0 | } |
287 | 0 | } |
288 | | #endif |
289 | | |
290 | | static inline void fill_variance_8x8avg_lowbd( |
291 | | const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, |
292 | | int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide, |
293 | 0 | int pixels_high) { |
294 | 0 | unsigned int sse[4] = { 0 }; |
295 | 0 | int sum[4] = { 0 }; |
296 | |
|
297 | 0 | if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) { |
298 | 0 | int src_avg[4]; |
299 | 0 | int dst_avg[4]; |
300 | 0 | aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg); |
301 | 0 | aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg); |
302 | 0 | for (int idx = 0; idx < 4; idx++) { |
303 | 0 | sum[idx] = src_avg[idx] - dst_avg[idx]; |
304 | 0 | sse[idx] = sum[idx] * sum[idx]; |
305 | 0 | } |
306 | 0 | } else { |
307 | 0 | for (int idx = 0; idx < 4; idx++) { |
308 | 0 | const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); |
309 | 0 | const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); |
310 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
311 | 0 | int src_avg = |
312 | 0 | aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride); |
313 | 0 | int dst_avg = |
314 | 0 | aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride); |
315 | 0 | sum[idx] = src_avg - dst_avg; |
316 | 0 | sse[idx] = sum[idx] * sum[idx]; |
317 | 0 | } |
318 | 0 | } |
319 | 0 | } |
320 | |
|
321 | 0 | for (int idx = 0; idx < 4; idx++) { |
322 | 0 | fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none); |
323 | 0 | } |
324 | 0 | } |
325 | | |
326 | | // Obtain parameters required to calculate variance (such as sum, sse, etc,.) |
327 | | // at 8x8 sub-block level for a given 16x16 block. |
328 | | // The function can be called only when is_key_frame is false since sum is |
329 | | // computed between source and reference frames. |
330 | | static inline void fill_variance_8x8avg(const uint8_t *src_buf, int src_stride, |
331 | | const uint8_t *dst_buf, int dst_stride, |
332 | | int x16_idx, int y16_idx, VP16x16 *vst, |
333 | | int highbd_flag, int pixels_wide, |
334 | 0 | int pixels_high) { |
335 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
336 | 0 | if (highbd_flag) { |
337 | 0 | fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride, |
338 | 0 | x16_idx, y16_idx, vst, pixels_wide, |
339 | 0 | pixels_high); |
340 | 0 | return; |
341 | 0 | } |
342 | | #else |
343 | | (void)highbd_flag; |
344 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
345 | 0 | fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx, |
346 | 0 | y16_idx, vst, pixels_wide, pixels_high); |
347 | 0 | } |
348 | | |
349 | | static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride, |
350 | | const uint8_t *dst_buf, int dst_stride, |
351 | | int x16_idx, int y16_idx, |
352 | | #if CONFIG_AV1_HIGHBITDEPTH |
353 | | int highbd_flag, |
354 | | #endif |
355 | 0 | int pixels_wide, int pixels_high) { |
356 | 0 | int minmax_max = 0; |
357 | 0 | int minmax_min = 255; |
358 | | // Loop over the 4 8x8 subblocks. |
359 | 0 | for (int idx = 0; idx < 4; idx++) { |
360 | 0 | const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); |
361 | 0 | const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); |
362 | 0 | int min = 0; |
363 | 0 | int max = 0; |
364 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
365 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
366 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
367 | 0 | aom_highbd_minmax_8x8( |
368 | 0 | src_buf + y8_idx * src_stride + x8_idx, src_stride, |
369 | 0 | dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max); |
370 | 0 | } else { |
371 | 0 | aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride, |
372 | 0 | dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, |
373 | 0 | &max); |
374 | 0 | } |
375 | | #else |
376 | | aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride, |
377 | | dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, |
378 | | &max); |
379 | | #endif |
380 | 0 | if ((max - min) > minmax_max) minmax_max = (max - min); |
381 | 0 | if ((max - min) < minmax_min) minmax_min = (max - min); |
382 | 0 | } |
383 | 0 | } |
384 | 0 | return (minmax_max - minmax_min); |
385 | 0 | } |
386 | | |
387 | | // Function to compute average and variance of 4x4 sub-block. |
388 | | // The function can be called only when is_key_frame is true since sum is |
389 | | // computed using source frame only. |
390 | | static inline void fill_variance_4x4avg(const uint8_t *src_buf, int src_stride, |
391 | | int x8_idx, int y8_idx, VP8x8 *vst, |
392 | | #if CONFIG_AV1_HIGHBITDEPTH |
393 | | int highbd_flag, |
394 | | #endif |
395 | | int pixels_wide, int pixels_high, |
396 | 0 | int border_offset_4x4) { |
397 | 0 | for (int idx = 0; idx < 4; idx++) { |
398 | 0 | const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2); |
399 | 0 | const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2); |
400 | 0 | unsigned int sse = 0; |
401 | 0 | int sum = 0; |
402 | 0 | if (x4_idx < pixels_wide - border_offset_4x4 && |
403 | 0 | y4_idx < pixels_high - border_offset_4x4) { |
404 | 0 | int src_avg; |
405 | 0 | int dst_avg = 128; |
406 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
407 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
408 | 0 | src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, |
409 | 0 | src_stride); |
410 | 0 | } else { |
411 | 0 | src_avg = |
412 | 0 | aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride); |
413 | 0 | } |
414 | | #else |
415 | | src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride); |
416 | | #endif |
417 | |
|
418 | 0 | sum = src_avg - dst_avg; |
419 | 0 | sse = sum * sum; |
420 | 0 | } |
421 | 0 | fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none); |
422 | 0 | } |
423 | 0 | } |
424 | | |
425 | | static int64_t scale_part_thresh_content(int64_t threshold_base, int speed, |
426 | | int non_reference_frame, |
427 | 0 | int is_static) { |
428 | 0 | int64_t threshold = threshold_base; |
429 | 0 | if (non_reference_frame && !is_static) threshold = (3 * threshold) >> 1; |
430 | 0 | if (speed >= 8) { |
431 | 0 | return (5 * threshold) >> 2; |
432 | 0 | } |
433 | 0 | return threshold; |
434 | 0 | } |
435 | | |
436 | | // Tune thresholds less or more aggressively to prefer larger partitions |
437 | | static inline void tune_thresh_based_on_qindex( |
438 | | AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex, |
439 | | int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd, |
440 | 0 | int lighting_change) { |
441 | 0 | double weight; |
442 | 0 | if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) { |
443 | 0 | const int win = 20; |
444 | 0 | if (current_qindex < QINDEX_LARGE_BLOCK_THR - win) |
445 | 0 | weight = 1.0; |
446 | 0 | else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win) |
447 | 0 | weight = 0.0; |
448 | 0 | else |
449 | 0 | weight = |
450 | 0 | 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win); |
451 | 0 | if (num_pixels > RESOLUTION_480P) { |
452 | 0 | for (int i = 0; i < 4; i++) { |
453 | 0 | thresholds[i] <<= 1; |
454 | 0 | } |
455 | 0 | } |
456 | 0 | if (num_pixels <= RESOLUTION_288P) { |
457 | 0 | thresholds[3] = INT64_MAX; |
458 | 0 | if (is_segment_id_boosted == false) { |
459 | 0 | thresholds[1] <<= 2; |
460 | 0 | thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4; |
461 | 0 | } else { |
462 | 0 | thresholds[1] <<= 1; |
463 | 0 | thresholds[2] <<= 3; |
464 | 0 | } |
465 | | // Allow for split to 8x8 for superblocks where part of it has |
466 | | // moving boundary. So allow for sb with source_sad above threshold, |
467 | | // and avoid very large source_sad or high source content, to avoid |
468 | | // too many 8x8 within superblock. |
469 | 0 | uint64_t avg_source_sad_thresh = 25000; |
470 | 0 | uint64_t block_sad_low = 25000; |
471 | 0 | uint64_t block_sad_high = 50000; |
472 | 0 | if (cpi->svc.temporal_layer_id == 0 && |
473 | 0 | cpi->svc.number_temporal_layers > 1) { |
474 | | // Increase the sad thresholds for base TL0, as reference/LAST is |
475 | | // 2/4 frames behind (for 2/3 #TL). |
476 | 0 | avg_source_sad_thresh = 40000; |
477 | 0 | block_sad_high = 70000; |
478 | 0 | } |
479 | 0 | if (is_segment_id_boosted == false && |
480 | 0 | cpi->rc.avg_source_sad < avg_source_sad_thresh && |
481 | 0 | block_sad > block_sad_low && block_sad < block_sad_high && |
482 | 0 | !lighting_change) { |
483 | 0 | thresholds[2] = (3 * thresholds[2]) >> 2; |
484 | 0 | thresholds[3] = thresholds[2] << 3; |
485 | 0 | } |
486 | | // Condition the increase of partition thresholds on the segment |
487 | | // and the content. Avoid the increase for superblocks which have |
488 | | // high source sad, unless the whole frame has very high motion |
489 | | // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks |
490 | | // have high source sad). |
491 | 0 | } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false && |
492 | 0 | (source_sad_nonrd != kHighSad || |
493 | 0 | cpi->rc.avg_source_sad > 50000)) { |
494 | 0 | thresholds[0] = (3 * thresholds[0]) >> 1; |
495 | 0 | thresholds[3] = INT64_MAX; |
496 | 0 | if (current_qindex > QINDEX_LARGE_BLOCK_THR) { |
497 | 0 | thresholds[1] = |
498 | 0 | (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]); |
499 | 0 | thresholds[2] = |
500 | 0 | (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]); |
501 | 0 | } |
502 | 0 | } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && |
503 | 0 | is_segment_id_boosted == false && |
504 | 0 | (source_sad_nonrd != kHighSad || |
505 | 0 | cpi->rc.avg_source_sad > 50000)) { |
506 | 0 | thresholds[1] = |
507 | 0 | (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]); |
508 | 0 | thresholds[2] = |
509 | 0 | (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]); |
510 | 0 | thresholds[3] = INT64_MAX; |
511 | 0 | } |
512 | 0 | } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) { |
513 | 0 | thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0; |
514 | 0 | thresholds[2] = |
515 | 0 | (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2]; |
516 | 0 | } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) { |
517 | 0 | const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1; |
518 | 0 | if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45) |
519 | 0 | weight = 1.0; |
520 | 0 | else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45) |
521 | 0 | weight = 0.0; |
522 | 0 | else |
523 | 0 | weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45); |
524 | 0 | thresholds[1] = |
525 | 0 | (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]); |
526 | 0 | thresholds[2] = |
527 | 0 | (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]); |
528 | 0 | thresholds[3] = |
529 | 0 | (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]); |
530 | 0 | } |
531 | 0 | if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128)) |
532 | 0 | thresholds[3] = INT64_MAX; |
533 | 0 | } |
534 | | |
535 | | static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[], |
536 | | int64_t threshold_base, |
537 | | int threshold_left_shift, |
538 | 0 | int num_pixels) { |
539 | 0 | if (cpi->sf.rt_sf.force_large_partition_blocks_intra) { |
540 | 0 | const int shift_steps = |
541 | 0 | threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8); |
542 | 0 | assert(shift_steps >= 0); |
543 | 0 | threshold_base <<= shift_steps; |
544 | 0 | } |
545 | 0 | thresholds[0] = threshold_base; |
546 | 0 | thresholds[1] = threshold_base; |
547 | 0 | if (num_pixels < RESOLUTION_720P) { |
548 | 0 | thresholds[2] = threshold_base / 3; |
549 | 0 | thresholds[3] = threshold_base >> 1; |
550 | 0 | } else { |
551 | 0 | int shift_val = 2; |
552 | 0 | if (cpi->sf.rt_sf.force_large_partition_blocks_intra) { |
553 | 0 | shift_val = (cpi->oxcf.mode == ALLINTRA ? 1 : 0); |
554 | 0 | } |
555 | |
|
556 | 0 | thresholds[2] = threshold_base >> shift_val; |
557 | 0 | thresholds[3] = threshold_base >> shift_val; |
558 | 0 | } |
559 | 0 | thresholds[4] = threshold_base << 2; |
560 | 0 | } |
561 | | |
562 | | static inline void tune_thresh_based_on_resolution( |
563 | | AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base, |
564 | 0 | int current_qindex, int source_sad_rd, int num_pixels) { |
565 | 0 | if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1; |
566 | 0 | if (num_pixels <= RESOLUTION_288P) { |
567 | 0 | const int qindex_thr[5][2] = { |
568 | 0 | { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 }, |
569 | 0 | }; |
570 | 0 | int th_idx = 0; |
571 | 0 | if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1) |
572 | 0 | th_idx = |
573 | 0 | (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0; |
574 | 0 | if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3) |
575 | 0 | th_idx = cpi->sf.rt_sf.var_part_based_on_qidx; |
576 | 0 | const int qindex_low_thr = qindex_thr[th_idx][0]; |
577 | 0 | const int qindex_high_thr = qindex_thr[th_idx][1]; |
578 | 0 | if (current_qindex >= qindex_high_thr) { |
579 | 0 | threshold_base = (5 * threshold_base) >> 1; |
580 | 0 | thresholds[1] = threshold_base >> 3; |
581 | 0 | thresholds[2] = threshold_base << 2; |
582 | 0 | thresholds[3] = threshold_base << 5; |
583 | 0 | } else if (current_qindex < qindex_low_thr) { |
584 | 0 | thresholds[1] = threshold_base >> 3; |
585 | 0 | thresholds[2] = threshold_base >> 1; |
586 | 0 | thresholds[3] = threshold_base << 3; |
587 | 0 | } else { |
588 | 0 | int64_t qi_diff_low = current_qindex - qindex_low_thr; |
589 | 0 | int64_t qi_diff_high = qindex_high_thr - current_qindex; |
590 | 0 | int64_t threshold_diff = qindex_high_thr - qindex_low_thr; |
591 | 0 | int64_t threshold_base_high = (5 * threshold_base) >> 1; |
592 | |
|
593 | 0 | threshold_diff = threshold_diff > 0 ? threshold_diff : 1; |
594 | 0 | threshold_base = |
595 | 0 | (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) / |
596 | 0 | threshold_diff; |
597 | 0 | thresholds[1] = threshold_base >> 3; |
598 | 0 | thresholds[2] = ((qi_diff_low * threshold_base) + |
599 | 0 | qi_diff_high * (threshold_base >> 1)) / |
600 | 0 | threshold_diff; |
601 | 0 | thresholds[3] = ((qi_diff_low * (threshold_base << 5)) + |
602 | 0 | qi_diff_high * (threshold_base << 3)) / |
603 | 0 | threshold_diff; |
604 | 0 | } |
605 | 0 | } else if (num_pixels < RESOLUTION_720P) { |
606 | 0 | thresholds[2] = (5 * threshold_base) >> 2; |
607 | 0 | } else if (num_pixels < RESOLUTION_1080P) { |
608 | 0 | thresholds[2] = threshold_base << 1; |
609 | 0 | } else { |
610 | | // num_pixels >= RESOLUTION_1080P |
611 | 0 | if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { |
612 | 0 | if (num_pixels < RESOLUTION_1440P) { |
613 | 0 | thresholds[2] = (5 * threshold_base) >> 1; |
614 | 0 | } else { |
615 | 0 | thresholds[2] = (7 * threshold_base) >> 1; |
616 | 0 | } |
617 | 0 | } else { |
618 | 0 | if (cpi->oxcf.speed > 7) { |
619 | 0 | thresholds[2] = 6 * threshold_base; |
620 | 0 | } else { |
621 | 0 | thresholds[2] = 3 * threshold_base; |
622 | 0 | } |
623 | 0 | } |
624 | 0 | } |
625 | 0 | } |
626 | | |
627 | | // Increase the base partition threshold, based on content and noise level. |
628 | | static inline int64_t tune_base_thresh_content(AV1_COMP *cpi, |
629 | | int64_t threshold_base, |
630 | | int content_lowsumdiff, |
631 | | int source_sad_nonrd, |
632 | 0 | int num_pixels) { |
633 | 0 | AV1_COMMON *const cm = &cpi->common; |
634 | 0 | int64_t updated_thresh_base = threshold_base; |
635 | 0 | if (cpi->noise_estimate.enabled && content_lowsumdiff && |
636 | 0 | num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) { |
637 | 0 | NOISE_LEVEL noise_level = |
638 | 0 | av1_noise_estimate_extract_level(&cpi->noise_estimate); |
639 | 0 | if (noise_level == kHigh) |
640 | 0 | updated_thresh_base = (5 * updated_thresh_base) >> 1; |
641 | 0 | else if (noise_level == kMedium && |
642 | 0 | !cpi->sf.rt_sf.prefer_large_partition_blocks) |
643 | 0 | updated_thresh_base = (5 * updated_thresh_base) >> 2; |
644 | 0 | } |
645 | 0 | updated_thresh_base = scale_part_thresh_content( |
646 | 0 | updated_thresh_base, cpi->oxcf.speed, |
647 | 0 | cpi->ppi->rtc_ref.non_reference_frame, cpi->rc.frame_source_sad == 0); |
648 | 0 | if (cpi->oxcf.speed >= 11 && source_sad_nonrd > kLowSad && |
649 | 0 | cpi->rc.high_motion_content_screen_rtc) |
650 | 0 | updated_thresh_base = updated_thresh_base << 4; |
651 | 0 | return updated_thresh_base; |
652 | 0 | } |
653 | | |
654 | | static inline void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], |
655 | | uint64_t blk_sad, int qindex, |
656 | | int content_lowsumdiff, |
657 | | int source_sad_nonrd, int source_sad_rd, |
658 | | bool is_segment_id_boosted, |
659 | 0 | int lighting_change) { |
660 | 0 | AV1_COMMON *const cm = &cpi->common; |
661 | 0 | const int is_key_frame = frame_is_intra_only(cm); |
662 | 0 | const int threshold_multiplier = is_key_frame ? 120 : 1; |
663 | 0 | const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth); |
664 | 0 | int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q); |
665 | 0 | const int current_qindex = cm->quant_params.base_qindex; |
666 | 0 | const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift; |
667 | 0 | const int num_pixels = cm->width * cm->height; |
668 | |
|
669 | 0 | if (is_key_frame) { |
670 | 0 | set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base, |
671 | 0 | threshold_left_shift, num_pixels); |
672 | 0 | return; |
673 | 0 | } |
674 | | |
675 | 0 | threshold_base = tune_base_thresh_content( |
676 | 0 | cpi, threshold_base, content_lowsumdiff, source_sad_nonrd, num_pixels); |
677 | 0 | thresholds[0] = threshold_base >> 1; |
678 | 0 | thresholds[1] = threshold_base; |
679 | 0 | thresholds[3] = threshold_base << threshold_left_shift; |
680 | |
|
681 | 0 | tune_thresh_based_on_resolution(cpi, thresholds, threshold_base, |
682 | 0 | current_qindex, source_sad_rd, num_pixels); |
683 | |
|
684 | 0 | tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex, |
685 | 0 | num_pixels, is_segment_id_boosted, |
686 | 0 | source_sad_nonrd, lighting_change); |
687 | 0 | } |
688 | | |
689 | | // Set temporal variance low flag for superblock 64x64. |
690 | | // Only first 25 in the array are used in this case. |
691 | | static inline void set_low_temp_var_flag_64x64(CommonModeInfoParams *mi_params, |
692 | | PartitionSearchInfo *part_info, |
693 | | MACROBLOCKD *xd, VP64x64 *vt, |
694 | | const int64_t thresholds[], |
695 | 0 | int mi_col, int mi_row) { |
696 | 0 | if (xd->mi[0]->bsize == BLOCK_64X64) { |
697 | 0 | if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) |
698 | 0 | part_info->variance_low[0] = 1; |
699 | 0 | } else if (xd->mi[0]->bsize == BLOCK_64X32) { |
700 | 0 | for (int part_idx = 0; part_idx < 2; part_idx++) { |
701 | 0 | if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2)) |
702 | 0 | part_info->variance_low[part_idx + 1] = 1; |
703 | 0 | } |
704 | 0 | } else if (xd->mi[0]->bsize == BLOCK_32X64) { |
705 | 0 | for (int part_idx = 0; part_idx < 2; part_idx++) { |
706 | 0 | if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2)) |
707 | 0 | part_info->variance_low[part_idx + 3] = 1; |
708 | 0 | } |
709 | 0 | } else { |
710 | 0 | static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; |
711 | 0 | for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { |
712 | 0 | const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) + |
713 | 0 | mi_col + idx[lvl1_idx][1]; |
714 | 0 | MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str; |
715 | |
|
716 | 0 | if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] || |
717 | 0 | mi_params->mi_rows <= mi_row + idx[lvl1_idx][0]) |
718 | 0 | continue; |
719 | | |
720 | 0 | if (*this_mi == NULL) continue; |
721 | | |
722 | 0 | if ((*this_mi)->bsize == BLOCK_32X32) { |
723 | 0 | int64_t threshold_32x32 = (5 * thresholds[1]) >> 3; |
724 | 0 | if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32) |
725 | 0 | part_info->variance_low[lvl1_idx + 5] = 1; |
726 | 0 | } else { |
727 | | // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block |
728 | | // inside. |
729 | 0 | if ((*this_mi)->bsize == BLOCK_16X16 || |
730 | 0 | (*this_mi)->bsize == BLOCK_32X16 || |
731 | 0 | (*this_mi)->bsize == BLOCK_16X32) { |
732 | 0 | for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { |
733 | 0 | if (vt->split[lvl1_idx] |
734 | 0 | .split[lvl2_idx] |
735 | 0 | .part_variances.none.variance < (thresholds[2] >> 8)) |
736 | 0 | part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1; |
737 | 0 | } |
738 | 0 | } |
739 | 0 | } |
740 | 0 | } |
741 | 0 | } |
742 | 0 | } |
743 | | |
744 | | static inline void set_low_temp_var_flag_128x128( |
745 | | CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info, |
746 | | MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col, |
747 | 0 | int mi_row) { |
748 | 0 | if (xd->mi[0]->bsize == BLOCK_128X128) { |
749 | 0 | if (vt->part_variances.none.variance < (thresholds[0] >> 1)) |
750 | 0 | part_info->variance_low[0] = 1; |
751 | 0 | } else if (xd->mi[0]->bsize == BLOCK_128X64) { |
752 | 0 | for (int part_idx = 0; part_idx < 2; part_idx++) { |
753 | 0 | if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2)) |
754 | 0 | part_info->variance_low[part_idx + 1] = 1; |
755 | 0 | } |
756 | 0 | } else if (xd->mi[0]->bsize == BLOCK_64X128) { |
757 | 0 | for (int part_idx = 0; part_idx < 2; part_idx++) { |
758 | 0 | if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2)) |
759 | 0 | part_info->variance_low[part_idx + 3] = 1; |
760 | 0 | } |
761 | 0 | } else { |
762 | 0 | static const int idx64[4][2] = { |
763 | 0 | { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 } |
764 | 0 | }; |
765 | 0 | static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; |
766 | 0 | for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { |
767 | 0 | const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) + |
768 | 0 | mi_col + idx64[lvl1_idx][1]; |
769 | 0 | MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str; |
770 | 0 | if (*mi_64 == NULL) continue; |
771 | 0 | if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] || |
772 | 0 | mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0]) |
773 | 0 | continue; |
774 | 0 | const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3; |
775 | 0 | if ((*mi_64)->bsize == BLOCK_64X64) { |
776 | 0 | if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64) |
777 | 0 | part_info->variance_low[5 + lvl1_idx] = 1; |
778 | 0 | } else if ((*mi_64)->bsize == BLOCK_64X32) { |
779 | 0 | for (int part_idx = 0; part_idx < 2; part_idx++) |
780 | 0 | if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance < |
781 | 0 | (threshold_64x64 >> 1)) |
782 | 0 | part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1; |
783 | 0 | } else if ((*mi_64)->bsize == BLOCK_32X64) { |
784 | 0 | for (int part_idx = 0; part_idx < 2; part_idx++) |
785 | 0 | if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance < |
786 | 0 | (threshold_64x64 >> 1)) |
787 | 0 | part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1; |
788 | 0 | } else { |
789 | 0 | for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { |
790 | 0 | const int idx_str1 = |
791 | 0 | mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1]; |
792 | 0 | MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1; |
793 | 0 | if (*mi_32 == NULL) continue; |
794 | | |
795 | 0 | if (mi_params->mi_cols <= |
796 | 0 | mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] || |
797 | 0 | mi_params->mi_rows <= |
798 | 0 | mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0]) |
799 | 0 | continue; |
800 | 0 | const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3; |
801 | 0 | if ((*mi_32)->bsize == BLOCK_32X32) { |
802 | 0 | if (vt->split[lvl1_idx] |
803 | 0 | .split[lvl2_idx] |
804 | 0 | .part_variances.none.variance < threshold_32x32) |
805 | 0 | part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1; |
806 | 0 | } else { |
807 | | // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block |
808 | | // inside. |
809 | 0 | if ((*mi_32)->bsize == BLOCK_16X16 || |
810 | 0 | (*mi_32)->bsize == BLOCK_32X16 || |
811 | 0 | (*mi_32)->bsize == BLOCK_16X32) { |
812 | 0 | for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) { |
813 | 0 | VPartVar *none_var = &vt->split[lvl1_idx] |
814 | 0 | .split[lvl2_idx] |
815 | 0 | .split[lvl3_idx] |
816 | 0 | .part_variances.none; |
817 | 0 | if (none_var->variance < (thresholds[3] >> 8)) |
818 | 0 | part_info->variance_low[41 + (lvl1_idx << 4) + |
819 | 0 | (lvl2_idx << 2) + lvl3_idx] = 1; |
820 | 0 | } |
821 | 0 | } |
822 | 0 | } |
823 | 0 | } |
824 | 0 | } |
825 | 0 | } |
826 | 0 | } |
827 | 0 | } |
828 | | |
829 | | static inline void set_low_temp_var_flag( |
830 | | AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd, |
831 | | VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, |
832 | 0 | int mi_col, int mi_row, const bool is_small_sb) { |
833 | 0 | AV1_COMMON *const cm = &cpi->common; |
834 | | // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected. |
835 | | // If the temporal variance is small set the flag |
836 | | // variance_low for the block. The variance threshold can be adjusted, the |
837 | | // higher the more aggressive. |
838 | 0 | if (ref_frame_partition == LAST_FRAME) { |
839 | 0 | if (is_small_sb) |
840 | 0 | set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd, |
841 | 0 | &(vt->split[0]), thresholds, mi_col, mi_row); |
842 | 0 | else |
843 | 0 | set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt, |
844 | 0 | thresholds, mi_col, mi_row); |
845 | 0 | } |
846 | 0 | } |
847 | | |
848 | | static const int pos_shift_16x16[4][4] = { |
849 | | { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 } |
850 | | }; |
851 | | |
852 | | int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low, |
853 | | int mi_row, int mi_col, |
854 | 0 | BLOCK_SIZE bsize) { |
855 | | // Relative indices of MB inside the superblock. |
856 | 0 | const int mi_x = mi_row & 0xF; |
857 | 0 | const int mi_y = mi_col & 0xF; |
858 | | // Relative indices of 16x16 block inside the superblock. |
859 | 0 | const int i = mi_x >> 2; |
860 | 0 | const int j = mi_y >> 2; |
861 | 0 | int force_skip_low_temp_var = 0; |
862 | | // Set force_skip_low_temp_var based on the block size and block offset. |
863 | 0 | switch (bsize) { |
864 | 0 | case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break; |
865 | 0 | case BLOCK_64X32: |
866 | 0 | if (!mi_y && !mi_x) { |
867 | 0 | force_skip_low_temp_var = variance_low[1]; |
868 | 0 | } else if (!mi_y && mi_x) { |
869 | 0 | force_skip_low_temp_var = variance_low[2]; |
870 | 0 | } |
871 | 0 | break; |
872 | 0 | case BLOCK_32X64: |
873 | 0 | if (!mi_y && !mi_x) { |
874 | 0 | force_skip_low_temp_var = variance_low[3]; |
875 | 0 | } else if (mi_y && !mi_x) { |
876 | 0 | force_skip_low_temp_var = variance_low[4]; |
877 | 0 | } |
878 | 0 | break; |
879 | 0 | case BLOCK_32X32: |
880 | 0 | if (!mi_y && !mi_x) { |
881 | 0 | force_skip_low_temp_var = variance_low[5]; |
882 | 0 | } else if (mi_y && !mi_x) { |
883 | 0 | force_skip_low_temp_var = variance_low[6]; |
884 | 0 | } else if (!mi_y && mi_x) { |
885 | 0 | force_skip_low_temp_var = variance_low[7]; |
886 | 0 | } else if (mi_y && mi_x) { |
887 | 0 | force_skip_low_temp_var = variance_low[8]; |
888 | 0 | } |
889 | 0 | break; |
890 | 0 | case BLOCK_32X16: |
891 | 0 | case BLOCK_16X32: |
892 | 0 | case BLOCK_16X16: |
893 | 0 | force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]]; |
894 | 0 | break; |
895 | 0 | default: break; |
896 | 0 | } |
897 | | |
898 | 0 | return force_skip_low_temp_var; |
899 | 0 | } |
900 | | |
901 | | int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row, |
902 | 0 | int mi_col, BLOCK_SIZE bsize) { |
903 | 0 | int force_skip_low_temp_var = 0; |
904 | 0 | int x, y; |
905 | 0 | x = (mi_col & 0x1F) >> 4; |
906 | | // y = (mi_row & 0x1F) >> 4; |
907 | | // const int idx64 = (y << 1) + x; |
908 | 0 | y = (mi_row & 0x17) >> 3; |
909 | 0 | const int idx64 = y + x; |
910 | |
|
911 | 0 | x = (mi_col & 0xF) >> 3; |
912 | | // y = (mi_row & 0xF) >> 3; |
913 | | // const int idx32 = (y << 1) + x; |
914 | 0 | y = (mi_row & 0xB) >> 2; |
915 | 0 | const int idx32 = y + x; |
916 | |
|
917 | 0 | x = (mi_col & 0x7) >> 2; |
918 | | // y = (mi_row & 0x7) >> 2; |
919 | | // const int idx16 = (y << 1) + x; |
920 | 0 | y = (mi_row & 0x5) >> 1; |
921 | 0 | const int idx16 = y + x; |
922 | | // Set force_skip_low_temp_var based on the block size and block offset. |
923 | 0 | switch (bsize) { |
924 | 0 | case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break; |
925 | 0 | case BLOCK_128X64: |
926 | 0 | assert((mi_col & 0x1F) == 0); |
927 | 0 | force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)]; |
928 | 0 | break; |
929 | 0 | case BLOCK_64X128: |
930 | 0 | assert((mi_row & 0x1F) == 0); |
931 | 0 | force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)]; |
932 | 0 | break; |
933 | 0 | case BLOCK_64X64: |
934 | | // Location of this 64x64 block inside the 128x128 superblock |
935 | 0 | force_skip_low_temp_var = variance_low[5 + idx64]; |
936 | 0 | break; |
937 | 0 | case BLOCK_64X32: |
938 | 0 | x = (mi_col & 0x1F) >> 4; |
939 | 0 | y = (mi_row & 0x1F) >> 3; |
940 | | /* |
941 | | .---------------.---------------. |
942 | | | x=0,y=0,idx=0 | x=0,y=0,idx=2 | |
943 | | :---------------+---------------: |
944 | | | x=0,y=1,idx=1 | x=1,y=1,idx=3 | |
945 | | :---------------+---------------: |
946 | | | x=0,y=2,idx=4 | x=1,y=2,idx=6 | |
947 | | :---------------+---------------: |
948 | | | x=0,y=3,idx=5 | x=1,y=3,idx=7 | |
949 | | '---------------'---------------' |
950 | | */ |
951 | 0 | const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2); |
952 | 0 | force_skip_low_temp_var = variance_low[9 + idx64x32]; |
953 | 0 | break; |
954 | 0 | case BLOCK_32X64: |
955 | 0 | x = (mi_col & 0x1F) >> 3; |
956 | 0 | y = (mi_row & 0x1F) >> 4; |
957 | 0 | const int idx32x64 = (y << 2) + x; |
958 | 0 | force_skip_low_temp_var = variance_low[17 + idx32x64]; |
959 | 0 | break; |
960 | 0 | case BLOCK_32X32: |
961 | 0 | force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32]; |
962 | 0 | break; |
963 | 0 | case BLOCK_32X16: |
964 | 0 | case BLOCK_16X32: |
965 | 0 | case BLOCK_16X16: |
966 | 0 | force_skip_low_temp_var = |
967 | 0 | variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16]; |
968 | 0 | break; |
969 | 0 | default: break; |
970 | 0 | } |
971 | 0 | return force_skip_low_temp_var; |
972 | 0 | } |
973 | | |
974 | | void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex, |
975 | 0 | int content_lowsumdiff) { |
976 | 0 | SPEED_FEATURES *const sf = &cpi->sf; |
977 | 0 | if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) { |
978 | 0 | return; |
979 | 0 | } else { |
980 | 0 | set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex, |
981 | 0 | content_lowsumdiff, 0, 0, 0, 0); |
982 | | // The threshold below is not changed locally. |
983 | 0 | cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3); |
984 | 0 | } |
985 | 0 | } |
986 | | |
987 | | static inline void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, |
988 | | unsigned int y_sad, unsigned int y_sad_g, |
989 | | unsigned int y_sad_alt, bool is_key_frame, |
990 | 0 | bool zero_motion, unsigned int *uv_sad) { |
991 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
992 | 0 | const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; |
993 | 0 | int shift_upper_limit = 1; |
994 | 0 | int shift_lower_limit = 3; |
995 | 0 | int fac_uv = 6; |
996 | 0 | if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return; |
997 | | |
998 | | // Use lower threshold (more conservative in setting color flag) for |
999 | | // higher resolutions non-screen, which tend to have more camera noise. |
1000 | | // Since this may be used to skip compound mode in nonrd pickmode, which |
1001 | | // is generally more effective for higher resolutions, better to be more |
1002 | | // conservative. |
1003 | 0 | if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) { |
1004 | 0 | if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P) |
1005 | 0 | fac_uv = 3; |
1006 | 0 | else |
1007 | 0 | fac_uv = 5; |
1008 | 0 | } |
1009 | 0 | if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && |
1010 | 0 | cpi->rc.high_source_sad) { |
1011 | 0 | shift_lower_limit = 7; |
1012 | 0 | } else if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && |
1013 | 0 | cpi->rc.percent_blocks_with_motion > 90 && |
1014 | 0 | cpi->rc.frame_source_sad > 10000 && source_sad_nonrd > kLowSad) { |
1015 | 0 | shift_lower_limit = 8; |
1016 | 0 | shift_upper_limit = 3; |
1017 | 0 | } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 && |
1018 | 0 | cpi->common.width * cpi->common.height >= 640 * 360) { |
1019 | 0 | shift_upper_limit = 2; |
1020 | 0 | shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4; |
1021 | 0 | } |
1022 | |
|
1023 | 0 | MB_MODE_INFO *mi = xd->mi[0]; |
1024 | 0 | const AV1_COMMON *const cm = &cpi->common; |
1025 | 0 | const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); |
1026 | 0 | const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME); |
1027 | 0 | const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME); |
1028 | 0 | const struct scale_factors *const sf = |
1029 | 0 | get_ref_scale_factors_const(cm, LAST_FRAME); |
1030 | 0 | const struct scale_factors *const sf_g = |
1031 | 0 | get_ref_scale_factors_const(cm, GOLDEN_FRAME); |
1032 | 0 | const struct scale_factors *const sf_alt = |
1033 | 0 | get_ref_scale_factors_const(cm, ALTREF_FRAME); |
1034 | 0 | struct buf_2d dst; |
1035 | 0 | unsigned int uv_sad_g = 0; |
1036 | 0 | unsigned int uv_sad_alt = 0; |
1037 | |
|
1038 | 0 | for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) { |
1039 | 0 | struct macroblock_plane *p = &x->plane[plane]; |
1040 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
1041 | 0 | const BLOCK_SIZE bs = |
1042 | 0 | get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); |
1043 | |
|
1044 | 0 | if (bs != BLOCK_INVALID) { |
1045 | | // For last: |
1046 | 0 | if (zero_motion) { |
1047 | 0 | if (mi->ref_frame[0] == LAST_FRAME) { |
1048 | 0 | uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( |
1049 | 0 | p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride); |
1050 | 0 | } else { |
1051 | 0 | uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer; |
1052 | 0 | setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width, |
1053 | 0 | yv12->uv_crop_height, yv12->uv_stride, xd->mi_row, |
1054 | 0 | xd->mi_col, sf, xd->plane[plane].subsampling_x, |
1055 | 0 | xd->plane[plane].subsampling_y); |
1056 | |
|
1057 | 0 | uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( |
1058 | 0 | p->src.buf, p->src.stride, dst.buf, dst.stride); |
1059 | 0 | } |
1060 | 0 | } else { |
1061 | 0 | uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( |
1062 | 0 | p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); |
1063 | 0 | } |
1064 | | |
1065 | | // For golden: |
1066 | 0 | if (y_sad_g != UINT_MAX) { |
1067 | 0 | uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer; |
1068 | 0 | setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width, |
1069 | 0 | yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row, |
1070 | 0 | xd->mi_col, sf_g, xd->plane[plane].subsampling_x, |
1071 | 0 | xd->plane[plane].subsampling_y); |
1072 | 0 | uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf, |
1073 | 0 | dst.stride); |
1074 | 0 | } |
1075 | | |
1076 | | // For altref: |
1077 | 0 | if (y_sad_alt != UINT_MAX) { |
1078 | 0 | uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer; |
1079 | 0 | setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width, |
1080 | 0 | yv12_alt->uv_crop_height, yv12_alt->uv_stride, |
1081 | 0 | xd->mi_row, xd->mi_col, sf_alt, |
1082 | 0 | xd->plane[plane].subsampling_x, |
1083 | 0 | xd->plane[plane].subsampling_y); |
1084 | 0 | uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, |
1085 | 0 | dst.buf, dst.stride); |
1086 | 0 | } |
1087 | 0 | } |
1088 | |
|
1089 | 0 | if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit)) |
1090 | 0 | x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1; |
1091 | 0 | else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit)) |
1092 | 0 | x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0; |
1093 | | // Borderline case: to be refined at coding block level in nonrd_pickmode, |
1094 | | // for coding block size < sb_size. |
1095 | 0 | else |
1096 | 0 | x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2; |
1097 | |
|
1098 | 0 | x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] = |
1099 | 0 | uv_sad_g > y_sad_g / fac_uv; |
1100 | 0 | x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] = |
1101 | 0 | uv_sad_alt > y_sad_alt / fac_uv; |
1102 | 0 | } |
1103 | 0 | } |
1104 | | |
1105 | | static void fill_variance_tree_leaves( |
1106 | | AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split, |
1107 | | int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4], |
1108 | | int64_t *thresholds, const uint8_t *src_buf, int src_stride, |
1109 | | const uint8_t *dst_buf, int dst_stride, bool is_key_frame, |
1110 | 0 | const bool is_small_sb) { |
1111 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1112 | 0 | const int num_64x64_blocks = is_small_sb ? 1 : 4; |
1113 | | // TODO(kyslov) Bring back compute_minmax_variance with content type detection |
1114 | 0 | const int compute_minmax_variance = 0; |
1115 | 0 | const int segment_id = xd->mi[0]->segment_id; |
1116 | 0 | int pixels_wide = 128, pixels_high = 128; |
1117 | 0 | int border_offset_4x4 = 0; |
1118 | 0 | int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf; |
1119 | | // dst_buf pointer is not used for is_key_frame, so it should be NULL. |
1120 | 0 | assert(IMPLIES(is_key_frame, dst_buf == NULL)); |
1121 | 0 | if (is_small_sb) { |
1122 | 0 | pixels_wide = 64; |
1123 | 0 | pixels_high = 64; |
1124 | 0 | } |
1125 | 0 | if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); |
1126 | 0 | if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); |
1127 | | #if CONFIG_AV1_TEMPORAL_DENOISING |
1128 | | temporal_denoising |= cpi->oxcf.noise_sensitivity; |
1129 | | #endif |
1130 | | // For temporal filtering or temporal denoiser enabled: since the source |
1131 | | // is modified we need to avoid 4x4 avg along superblock boundary, since |
1132 | | // simd code will load 8 pixels for 4x4 avg and so can access source |
1133 | | // data outside superblock (while its being modified by temporal filter). |
1134 | | // Temporal filtering is never done on key frames. |
1135 | 0 | if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4; |
1136 | 0 | for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) { |
1137 | 0 | const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6); |
1138 | 0 | const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6); |
1139 | 0 | const int blk64_scale_idx = blk64_idx << 2; |
1140 | 0 | force_split[blk64_idx + 1] = PART_EVAL_ALL; |
1141 | |
|
1142 | 0 | for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { |
1143 | 0 | const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5); |
1144 | 0 | const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5); |
1145 | 0 | const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; |
1146 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL; |
1147 | 0 | avg_16x16[blk64_idx][lvl1_idx] = 0; |
1148 | 0 | maxvar_16x16[blk64_idx][lvl1_idx] = 0; |
1149 | 0 | minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX; |
1150 | 0 | for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { |
1151 | 0 | const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4); |
1152 | 0 | const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4); |
1153 | 0 | const int split_index = 21 + lvl1_scale_idx + lvl2_idx; |
1154 | 0 | VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; |
1155 | 0 | force_split[split_index] = PART_EVAL_ALL; |
1156 | 0 | if (is_key_frame) { |
1157 | | // Go down to 4x4 down-sampling for variance. |
1158 | 0 | for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) { |
1159 | 0 | const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3); |
1160 | 0 | const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3); |
1161 | 0 | VP8x8 *vst2 = &vst->split[lvl3_idx]; |
1162 | 0 | fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2, |
1163 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
1164 | 0 | xd->cur_buf->flags, |
1165 | 0 | #endif |
1166 | 0 | pixels_wide, pixels_high, border_offset_4x4); |
1167 | 0 | } |
1168 | 0 | } else { |
1169 | 0 | fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride, |
1170 | 0 | x16_idx, y16_idx, vst, is_cur_buf_hbd(xd), |
1171 | 0 | pixels_wide, pixels_high); |
1172 | |
|
1173 | 0 | fill_variance_tree(vst, BLOCK_16X16); |
1174 | 0 | VPartVar *none_var = &vt->split[blk64_idx] |
1175 | 0 | .split[lvl1_idx] |
1176 | 0 | .split[lvl2_idx] |
1177 | 0 | .part_variances.none; |
1178 | 0 | get_variance(none_var); |
1179 | 0 | const int val_none_var = none_var->variance; |
1180 | 0 | avg_16x16[blk64_idx][lvl1_idx] += val_none_var; |
1181 | 0 | minvar_16x16[blk64_idx][lvl1_idx] = |
1182 | 0 | AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var); |
1183 | 0 | maxvar_16x16[blk64_idx][lvl1_idx] = |
1184 | 0 | AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var); |
1185 | 0 | if (val_none_var > thresholds[3]) { |
1186 | | // 16X16 variance is above threshold for split, so force split to |
1187 | | // 8x8 for this 16x16 block (this also forces splits for upper |
1188 | | // levels). |
1189 | 0 | force_split[split_index] = PART_EVAL_ONLY_SPLIT; |
1190 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; |
1191 | 0 | force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; |
1192 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1193 | 0 | } else if (!cyclic_refresh_segment_id_boosted(segment_id) && |
1194 | 0 | compute_minmax_variance && val_none_var > thresholds[2]) { |
1195 | | // We have some nominal amount of 16x16 variance (based on average), |
1196 | | // compute the minmax over the 8x8 sub-blocks, and if above |
1197 | | // threshold, force split to 8x8 block for this 16x16 block. |
1198 | 0 | int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf, |
1199 | 0 | dst_stride, x16_idx, y16_idx, |
1200 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
1201 | 0 | xd->cur_buf->flags, |
1202 | 0 | #endif |
1203 | 0 | pixels_wide, pixels_high); |
1204 | 0 | const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax; |
1205 | 0 | if (minmax > thresh_minmax) { |
1206 | 0 | force_split[split_index] = PART_EVAL_ONLY_SPLIT; |
1207 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx] = |
1208 | 0 | PART_EVAL_ONLY_SPLIT; |
1209 | 0 | force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; |
1210 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1211 | 0 | } |
1212 | 0 | } |
1213 | 0 | } |
1214 | 0 | } |
1215 | 0 | } |
1216 | 0 | } |
1217 | 0 | } |
1218 | | |
1219 | | static inline void set_ref_frame_for_partition( |
1220 | | AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
1221 | | MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi, |
1222 | | unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt, |
1223 | | const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt, |
1224 | 0 | int mi_row, int mi_col, int num_planes) { |
1225 | 0 | AV1_COMMON *const cm = &cpi->common; |
1226 | 0 | const double fac = |
1227 | 0 | (cpi->svc.spatial_layer_id > 0 && cpi->svc.has_lower_quality_layer) ? 1.0 |
1228 | 0 | : 0.9; |
1229 | 0 | const bool is_set_golden_ref_frame = |
1230 | 0 | *y_sad_g < fac * *y_sad && *y_sad_g < *y_sad_alt; |
1231 | 0 | const bool is_set_altref_ref_frame = |
1232 | 0 | *y_sad_alt < fac * *y_sad && *y_sad_alt < *y_sad_g; |
1233 | |
|
1234 | 0 | if (is_set_golden_ref_frame) { |
1235 | 0 | av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
1236 | 0 | get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes); |
1237 | 0 | mi->ref_frame[0] = GOLDEN_FRAME; |
1238 | 0 | mi->mv[0].as_int = 0; |
1239 | 0 | *y_sad = *y_sad_g; |
1240 | 0 | *ref_frame_partition = GOLDEN_FRAME; |
1241 | 0 | x->nonrd_prune_ref_frame_search = 0; |
1242 | 0 | x->sb_me_partition = 0; |
1243 | 0 | } else if (is_set_altref_ref_frame) { |
1244 | 0 | av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col, |
1245 | 0 | get_ref_scale_factors(cm, ALTREF_FRAME), num_planes); |
1246 | 0 | mi->ref_frame[0] = ALTREF_FRAME; |
1247 | 0 | mi->mv[0].as_int = 0; |
1248 | 0 | *y_sad = *y_sad_alt; |
1249 | 0 | *ref_frame_partition = ALTREF_FRAME; |
1250 | 0 | x->nonrd_prune_ref_frame_search = 0; |
1251 | 0 | x->sb_me_partition = 0; |
1252 | 0 | } else { |
1253 | 0 | *ref_frame_partition = LAST_FRAME; |
1254 | 0 | x->nonrd_prune_ref_frame_search = |
1255 | 0 | cpi->sf.rt_sf.nonrd_prune_ref_frame_search; |
1256 | 0 | } |
1257 | 0 | } |
1258 | | |
1259 | | static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0, |
1260 | 0 | const FULLPEL_MV *mv1) { |
1261 | 0 | return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col); |
1262 | 0 | } |
1263 | | |
1264 | | static inline void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x, |
1265 | | unsigned int *y_sad, bool is_small_sb, |
1266 | 0 | int est_motion) { |
1267 | 0 | const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; |
1268 | | // TODO(yunqingwang@google.com): test if this condition works with other |
1269 | | // speeds. |
1270 | 0 | if (est_motion > 2 && source_sad_nonrd > kMedSad) return; |
1271 | | |
1272 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1273 | 0 | BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; |
1274 | 0 | MB_MODE_INFO *mi = xd->mi[0]; |
1275 | |
|
1276 | 0 | unsigned int above_y_sad = UINT_MAX; |
1277 | 0 | unsigned int left_y_sad = UINT_MAX; |
1278 | 0 | FULLPEL_MV above_mv = kZeroFullMv; |
1279 | 0 | FULLPEL_MV left_mv = kZeroFullMv; |
1280 | 0 | SubpelMvLimits subpel_mv_limits; |
1281 | 0 | const MV dummy_mv = { 0, 0 }; |
1282 | 0 | av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv); |
1283 | | |
1284 | | // Current best MV |
1285 | 0 | FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv); |
1286 | 0 | const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8; |
1287 | |
|
1288 | 0 | if (xd->up_available) { |
1289 | 0 | const MB_MODE_INFO *above_mbmi = xd->above_mbmi; |
1290 | 0 | if (above_mbmi->mode >= INTRA_MODE_END && |
1291 | 0 | above_mbmi->ref_frame[0] == LAST_FRAME) { |
1292 | 0 | MV temp = above_mbmi->mv[0].as_mv; |
1293 | 0 | clamp_mv(&temp, &subpel_mv_limits); |
1294 | 0 | above_mv = get_fullmv_from_mv(&temp); |
1295 | |
|
1296 | 0 | if (mv_distance(&best_mv, &above_mv) > 0) { |
1297 | 0 | uint8_t const *ref_buf = |
1298 | 0 | get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv); |
1299 | 0 | above_y_sad = cpi->ppi->fn_ptr[bsize].sdf( |
1300 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, ref_buf, |
1301 | 0 | xd->plane[0].pre[0].stride); |
1302 | 0 | } |
1303 | 0 | } |
1304 | 0 | } |
1305 | 0 | if (xd->left_available) { |
1306 | 0 | const MB_MODE_INFO *left_mbmi = xd->left_mbmi; |
1307 | 0 | if (left_mbmi->mode >= INTRA_MODE_END && |
1308 | 0 | left_mbmi->ref_frame[0] == LAST_FRAME) { |
1309 | 0 | MV temp = left_mbmi->mv[0].as_mv; |
1310 | 0 | clamp_mv(&temp, &subpel_mv_limits); |
1311 | 0 | left_mv = get_fullmv_from_mv(&temp); |
1312 | |
|
1313 | 0 | if (mv_distance(&best_mv, &left_mv) > 0 && |
1314 | 0 | mv_distance(&above_mv, &left_mv) > 0) { |
1315 | 0 | uint8_t const *ref_buf = |
1316 | 0 | get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv); |
1317 | 0 | left_y_sad = cpi->ppi->fn_ptr[bsize].sdf( |
1318 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, ref_buf, |
1319 | 0 | xd->plane[0].pre[0].stride); |
1320 | 0 | } |
1321 | 0 | } |
1322 | 0 | } |
1323 | |
|
1324 | 0 | if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) { |
1325 | 0 | *y_sad = above_y_sad; |
1326 | 0 | mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv); |
1327 | 0 | clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits); |
1328 | 0 | } |
1329 | 0 | if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) { |
1330 | 0 | *y_sad = left_y_sad; |
1331 | 0 | mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv); |
1332 | 0 | clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits); |
1333 | 0 | } |
1334 | 0 | } |
1335 | | |
1336 | | static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x, |
1337 | | unsigned int *y_sad, int mi_row, |
1338 | 0 | int mi_col, int source_sad_nonrd) { |
1339 | 0 | AV1_COMMON *const cm = &cpi->common; |
1340 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1341 | 0 | MB_MODE_INFO *mi = xd->mi[0]; |
1342 | 0 | const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; |
1343 | 0 | const int increase_col_sw = source_sad_nonrd > kMedSad && |
1344 | 0 | !cpi->rc.high_motion_content_screen_rtc && |
1345 | 0 | (cpi->svc.temporal_layer_id == 0 || |
1346 | 0 | cpi->rc.num_col_blscroll_last_tl0 > 2); |
1347 | 0 | int me_search_size_col = is_screen |
1348 | 0 | ? increase_col_sw ? 512 : 96 |
1349 | 0 | : block_size_wide[cm->seq_params->sb_size] >> 1; |
1350 | | // For screen use larger search size row motion to capture |
1351 | | // vertical scroll, which can be larger motion. |
1352 | 0 | int me_search_size_row = is_screen |
1353 | 0 | ? source_sad_nonrd > kMedSad ? 512 : 192 |
1354 | 0 | : block_size_high[cm->seq_params->sb_size] >> 1; |
1355 | 0 | if (cm->width * cm->height >= 3840 * 2160 && |
1356 | 0 | cpi->svc.temporal_layer_id == 0 && cpi->svc.number_temporal_layers > 1) { |
1357 | 0 | me_search_size_row = me_search_size_row << 1; |
1358 | 0 | me_search_size_col = me_search_size_col << 1; |
1359 | 0 | } |
1360 | 0 | unsigned int y_sad_zero; |
1361 | 0 | *y_sad = av1_int_pro_motion_estimation( |
1362 | 0 | cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero, |
1363 | 0 | me_search_size_col, me_search_size_row); |
1364 | | // The logic below selects whether the motion estimated in the |
1365 | | // int_pro_motion() will be used in nonrd_pickmode. Only do this |
1366 | | // for screen for now. |
1367 | 0 | if (is_screen) { |
1368 | 0 | unsigned int thresh_sad = |
1369 | 0 | (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000; |
1370 | 0 | if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) { |
1371 | 0 | x->sb_me_partition = 1; |
1372 | 0 | x->sb_me_mv.as_int = mi->mv[0].as_int; |
1373 | 0 | if (cpi->svc.temporal_layer_id == 0) { |
1374 | 0 | if (abs(mi->mv[0].as_mv.col) > 16 && abs(mi->mv[0].as_mv.row) == 0) |
1375 | 0 | x->sb_col_scroll++; |
1376 | 0 | else if (abs(mi->mv[0].as_mv.row) > 16 && abs(mi->mv[0].as_mv.col) == 0) |
1377 | 0 | x->sb_row_scroll++; |
1378 | 0 | } |
1379 | 0 | } else { |
1380 | 0 | x->sb_me_partition = 0; |
1381 | | // Fall back to using zero motion. |
1382 | 0 | *y_sad = y_sad_zero; |
1383 | 0 | mi->mv[0].as_int = 0; |
1384 | 0 | } |
1385 | 0 | } |
1386 | 0 | } |
1387 | | |
1388 | | static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad, |
1389 | | unsigned int *y_sad_g, unsigned int *y_sad_alt, |
1390 | | unsigned int *y_sad_last, |
1391 | | MV_REFERENCE_FRAME *ref_frame_partition, |
1392 | | struct scale_factors *sf_no_scale, int mi_row, |
1393 | 0 | int mi_col, bool is_small_sb, bool scaled_ref_last) { |
1394 | 0 | AV1_COMMON *const cm = &cpi->common; |
1395 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1396 | 0 | const int num_planes = av1_num_planes(cm); |
1397 | 0 | bool scaled_ref_golden = false; |
1398 | 0 | bool scaled_ref_alt = false; |
1399 | 0 | BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; |
1400 | 0 | MB_MODE_INFO *mi = xd->mi[0]; |
1401 | 0 | const YV12_BUFFER_CONFIG *yv12 = |
1402 | 0 | scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME) |
1403 | 0 | : get_ref_frame_yv12_buf(cm, LAST_FRAME); |
1404 | 0 | assert(yv12 != NULL); |
1405 | 0 | const YV12_BUFFER_CONFIG *yv12_g = NULL; |
1406 | 0 | const YV12_BUFFER_CONFIG *yv12_alt = NULL; |
1407 | | // Check if LAST is a reference. For spatial layers always use it as |
1408 | | // reference scaling. |
1409 | 0 | int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) || |
1410 | 0 | cpi->svc.number_spatial_layers > 1; |
1411 | 0 | int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG; |
1412 | 0 | int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config || |
1413 | 0 | cpi->sf.rt_sf.use_nonrd_altref_frame || |
1414 | 0 | (cpi->sf.rt_sf.use_comp_ref_nonrd && |
1415 | 0 | cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1); |
1416 | | |
1417 | | // Check if GOLDEN should be used as reference for partitioning. |
1418 | | // Allow for spatial layers if lower layer has same resolution. |
1419 | 0 | if ((cpi->svc.number_spatial_layers == 1 || |
1420 | 0 | cpi->svc.has_lower_quality_layer) && |
1421 | 0 | use_golden_ref && |
1422 | 0 | (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) { |
1423 | 0 | yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME); |
1424 | 0 | if (yv12_g && (yv12_g->y_crop_height != cm->height || |
1425 | 0 | yv12_g->y_crop_width != cm->width)) { |
1426 | 0 | yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME); |
1427 | 0 | scaled_ref_golden = true; |
1428 | 0 | } |
1429 | 0 | if (yv12_g && (yv12_g != yv12 || !use_last_ref)) { |
1430 | 0 | av1_setup_pre_planes( |
1431 | 0 | xd, 0, yv12_g, mi_row, mi_col, |
1432 | 0 | scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME), |
1433 | 0 | num_planes); |
1434 | 0 | *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf( |
1435 | 0 | x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, |
1436 | 0 | xd->plane[AOM_PLANE_Y].pre[0].buf, |
1437 | 0 | xd->plane[AOM_PLANE_Y].pre[0].stride); |
1438 | 0 | } |
1439 | 0 | } |
1440 | | |
1441 | | // Check if ALTREF should be used as reference for partitioning. |
1442 | | // Allow for spatial layers if lower layer has same resolution. |
1443 | 0 | if ((cpi->svc.number_spatial_layers == 1 || |
1444 | 0 | cpi->svc.has_lower_quality_layer) && |
1445 | 0 | use_alt_ref && (cpi->ref_frame_flags & AOM_ALT_FLAG) && |
1446 | 0 | (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) { |
1447 | 0 | yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME); |
1448 | 0 | if (yv12_alt && (yv12_alt->y_crop_height != cm->height || |
1449 | 0 | yv12_alt->y_crop_width != cm->width)) { |
1450 | 0 | yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME); |
1451 | 0 | scaled_ref_alt = true; |
1452 | 0 | } |
1453 | 0 | if (yv12_alt && (yv12_alt != yv12 || !use_last_ref)) { |
1454 | 0 | av1_setup_pre_planes( |
1455 | 0 | xd, 0, yv12_alt, mi_row, mi_col, |
1456 | 0 | scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME), |
1457 | 0 | num_planes); |
1458 | 0 | *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf( |
1459 | 0 | x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, |
1460 | 0 | xd->plane[AOM_PLANE_Y].pre[0].buf, |
1461 | 0 | xd->plane[AOM_PLANE_Y].pre[0].stride); |
1462 | 0 | } |
1463 | 0 | } |
1464 | |
|
1465 | 0 | if (use_last_ref) { |
1466 | 0 | const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; |
1467 | 0 | av1_setup_pre_planes( |
1468 | 0 | xd, 0, yv12, mi_row, mi_col, |
1469 | 0 | scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME), |
1470 | 0 | num_planes); |
1471 | 0 | mi->ref_frame[0] = LAST_FRAME; |
1472 | 0 | mi->ref_frame[1] = NONE_FRAME; |
1473 | 0 | mi->bsize = cm->seq_params->sb_size; |
1474 | 0 | mi->mv[0].as_int = 0; |
1475 | 0 | mi->interp_filters = av1_broadcast_interp_filter(BILINEAR); |
1476 | |
|
1477 | 0 | int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition; |
1478 | | // TODO(b/290596301): Look into adjusting this condition. |
1479 | | // There is regression on color content when |
1480 | | // estimate_motion_for_var_based_partition = 3 and high motion, |
1481 | | // so for now force it to 2 based on superblock sad. |
1482 | 0 | if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2; |
1483 | |
|
1484 | 0 | if ((est_motion == 1 || est_motion == 2) && xd->mb_to_right_edge >= 0 && |
1485 | 0 | xd->mb_to_bottom_edge >= 0 && x->source_variance > 100 && |
1486 | 0 | source_sad_nonrd > kLowSad) { |
1487 | 0 | do_int_pro_motion_estimation(cpi, x, y_sad, mi_row, mi_col, |
1488 | 0 | source_sad_nonrd); |
1489 | 0 | } |
1490 | |
|
1491 | 0 | if (*y_sad == UINT_MAX) { |
1492 | 0 | *y_sad = cpi->ppi->fn_ptr[bsize].sdf( |
1493 | 0 | x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, |
1494 | 0 | xd->plane[AOM_PLANE_Y].pre[0].buf, |
1495 | 0 | xd->plane[AOM_PLANE_Y].pre[0].stride); |
1496 | 0 | } |
1497 | | |
1498 | | // Evaluate if neighbours' MVs give better predictions. Zero MV is tested |
1499 | | // already, so only non-zero MVs are tested here. Here the neighbour blocks |
1500 | | // are the first block above or left to this superblock. |
1501 | 0 | if (est_motion >= 2 && (xd->up_available || xd->left_available)) |
1502 | 0 | evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion); |
1503 | |
|
1504 | 0 | *y_sad_last = *y_sad; |
1505 | 0 | } |
1506 | | |
1507 | | // Pick the ref frame for partitioning, use golden or altref frame only if |
1508 | | // its lower sad, bias to LAST with factor 0.9. |
1509 | 0 | set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad, |
1510 | 0 | y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row, |
1511 | 0 | mi_col, num_planes); |
1512 | | |
1513 | | // Only calculate the predictor for non-zero MV. |
1514 | 0 | if (mi->mv[0].as_int != 0) { |
1515 | 0 | if (!scaled_ref_last) { |
1516 | 0 | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
1517 | 0 | } else { |
1518 | 0 | xd->block_ref_scale_factors[0] = sf_no_scale; |
1519 | 0 | xd->block_ref_scale_factors[1] = sf_no_scale; |
1520 | 0 | } |
1521 | 0 | av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, |
1522 | 0 | cm->seq_params->sb_size, AOM_PLANE_Y, |
1523 | 0 | num_planes - 1); |
1524 | 0 | } |
1525 | 0 | } |
1526 | | |
1527 | | // Decides whether to split or merge a 16x16 partition block in variance based |
1528 | | // partitioning based on the 8x8 sub-block variances. |
1529 | | static inline PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var( |
1530 | 0 | VP16x16 *var_16x16_info, int64_t threshold16) { |
1531 | 0 | int max_8x8_var = 0, min_8x8_var = INT_MAX; |
1532 | 0 | for (int split_idx = 0; split_idx < 4; split_idx++) { |
1533 | 0 | get_variance(&var_16x16_info->split[split_idx].part_variances.none); |
1534 | 0 | int this_8x8_var = |
1535 | 0 | var_16x16_info->split[split_idx].part_variances.none.variance; |
1536 | 0 | max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var); |
1537 | 0 | min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var); |
1538 | 0 | } |
1539 | | // If the difference between maximum and minimum sub-block variances is high, |
1540 | | // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate |
1541 | | // only PARTITION_NONE. The shift factor for threshold16 has been derived |
1542 | | // empirically. |
1543 | 0 | return ((max_8x8_var - min_8x8_var) > (threshold16 << 2)) |
1544 | 0 | ? PART_EVAL_ONLY_SPLIT |
1545 | 0 | : PART_EVAL_ONLY_NONE; |
1546 | 0 | } |
1547 | | |
1548 | | static inline bool is_set_force_zeromv_skip_based_on_src_sad( |
1549 | 0 | int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) { |
1550 | 0 | if (set_zeromv_skip_based_on_source_sad == 0) return false; |
1551 | | |
1552 | 0 | if (set_zeromv_skip_based_on_source_sad >= 3) |
1553 | 0 | return source_sad_nonrd <= kLowSad; |
1554 | 0 | else if (set_zeromv_skip_based_on_source_sad >= 2) |
1555 | 0 | return source_sad_nonrd <= kVeryLowSad; |
1556 | 0 | else if (set_zeromv_skip_based_on_source_sad >= 1) |
1557 | 0 | return source_sad_nonrd == kZeroSad; |
1558 | | |
1559 | 0 | return false; |
1560 | 0 | } |
1561 | | |
1562 | | static inline bool set_force_zeromv_skip_for_sb( |
1563 | | AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt, |
1564 | | unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad, |
1565 | 0 | BLOCK_SIZE bsize) { |
1566 | 0 | AV1_COMMON *const cm = &cpi->common; |
1567 | 0 | if (!is_set_force_zeromv_skip_based_on_src_sad( |
1568 | 0 | cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad, |
1569 | 0 | x->content_state_sb.source_sad_nonrd)) |
1570 | 0 | return false; |
1571 | 0 | int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0; |
1572 | 0 | const int block_width = mi_size_wide[cm->seq_params->sb_size]; |
1573 | 0 | const int block_height = mi_size_high[cm->seq_params->sb_size]; |
1574 | 0 | const unsigned int thresh_exit_part_y = |
1575 | 0 | cpi->zeromv_skip_thresh_exit_part[bsize] << shift; |
1576 | 0 | unsigned int thresh_exit_part_uv = |
1577 | 0 | CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift; |
1578 | | // Be more aggressive in UV threshold if source_sad >= VeryLowSad |
1579 | | // to suppreess visual artifact caused by the speed feature: |
1580 | | // set_zeromv_skip_based_on_source_sad = 2. For now only for |
1581 | | // part_early_exit_zeromv = 1. |
1582 | 0 | if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad && |
1583 | 0 | cpi->sf.rt_sf.part_early_exit_zeromv == 1) |
1584 | 0 | thresh_exit_part_uv = thresh_exit_part_uv >> 3; |
1585 | 0 | if (mi_col + block_width <= tile->mi_col_end && |
1586 | 0 | mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y && |
1587 | 0 | uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) { |
1588 | 0 | set_block_size(cpi, mi_row, mi_col, bsize); |
1589 | 0 | x->force_zeromv_skip_for_sb = 1; |
1590 | 0 | aom_free(vt); |
1591 | | // Partition shape is set here at SB level. |
1592 | | // Exit needs to happen from av1_choose_var_based_partitioning(). |
1593 | 0 | return true; |
1594 | 0 | } else if (x->content_state_sb.source_sad_nonrd == kZeroSad && |
1595 | 0 | cpi->sf.rt_sf.part_early_exit_zeromv >= 2) |
1596 | 0 | x->force_zeromv_skip_for_sb = 2; |
1597 | 0 | return false; |
1598 | 0 | } |
1599 | | |
1600 | | int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile, |
1601 | | ThreadData *td, MACROBLOCK *x, int mi_row, |
1602 | 0 | int mi_col) { |
1603 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
1604 | | start_timing(cpi, choose_var_based_partitioning_time); |
1605 | | #endif |
1606 | 0 | AV1_COMMON *const cm = &cpi->common; |
1607 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1608 | 0 | const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds; |
1609 | 0 | PART_EVAL_STATUS force_split[85]; |
1610 | 0 | int avg_64x64; |
1611 | 0 | int max_var_32x32[4]; |
1612 | 0 | int min_var_32x32[4]; |
1613 | 0 | int var_32x32; |
1614 | 0 | int var_64x64; |
1615 | 0 | int min_var_64x64 = INT_MAX; |
1616 | 0 | int max_var_64x64 = 0; |
1617 | 0 | int avg_16x16[4][4]; |
1618 | 0 | int maxvar_16x16[4][4]; |
1619 | 0 | int minvar_16x16[4][4]; |
1620 | 0 | const uint8_t *src_buf; |
1621 | 0 | const uint8_t *dst_buf; |
1622 | 0 | int dst_stride; |
1623 | 0 | unsigned int uv_sad[MAX_MB_PLANE - 1]; |
1624 | 0 | NOISE_LEVEL noise_level = kLow; |
1625 | 0 | bool is_zero_motion = true; |
1626 | 0 | bool scaled_ref_last = false; |
1627 | 0 | struct scale_factors sf_no_scale; |
1628 | 0 | av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height, |
1629 | 0 | cm->width, cm->height); |
1630 | |
|
1631 | 0 | bool is_key_frame = |
1632 | 0 | (frame_is_intra_only(cm) || |
1633 | 0 | (cpi->ppi->use_svc && |
1634 | 0 | cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); |
1635 | |
|
1636 | 0 | assert(cm->seq_params->sb_size == BLOCK_64X64 || |
1637 | 0 | cm->seq_params->sb_size == BLOCK_128X128); |
1638 | 0 | const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64); |
1639 | 0 | const int num_64x64_blocks = is_small_sb ? 1 : 4; |
1640 | |
|
1641 | 0 | unsigned int y_sad = UINT_MAX; |
1642 | 0 | unsigned int y_sad_g = UINT_MAX; |
1643 | 0 | unsigned int y_sad_alt = UINT_MAX; |
1644 | 0 | unsigned int y_sad_last = UINT_MAX; |
1645 | 0 | BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; |
1646 | | |
1647 | | // Force skip encoding for all superblocks on slide change for |
1648 | | // non_reference_frames. |
1649 | 0 | if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change && |
1650 | 0 | cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) { |
1651 | 0 | MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + |
1652 | 0 | get_mi_grid_idx(&cm->mi_params, mi_row, mi_col); |
1653 | 0 | av1_set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); |
1654 | 0 | x->force_zeromv_skip_for_sb = 1; |
1655 | 0 | return 0; |
1656 | 0 | } |
1657 | | |
1658 | | // Ref frame used in partitioning. |
1659 | 0 | MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; |
1660 | |
|
1661 | 0 | int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1], |
1662 | 0 | vbp_thresholds[2], vbp_thresholds[3], |
1663 | 0 | vbp_thresholds[4] }; |
1664 | |
|
1665 | 0 | const int segment_id = xd->mi[0]->segment_id; |
1666 | 0 | uint64_t blk_sad = 0; |
1667 | 0 | if (cpi->src_sad_blk_64x64 != NULL && |
1668 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { |
1669 | 0 | const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128) |
1670 | 0 | ? (cm->seq_params->mib_size >> 1) |
1671 | 0 | : cm->seq_params->mib_size; |
1672 | 0 | const int sb_cols = |
1673 | 0 | (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb; |
1674 | 0 | const int sbi_col = mi_col / sb_size_by_mb; |
1675 | 0 | const int sbi_row = mi_row / sb_size_by_mb; |
1676 | 0 | blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols]; |
1677 | 0 | } |
1678 | |
|
1679 | 0 | const bool is_segment_id_boosted = |
1680 | 0 | cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && |
1681 | 0 | cyclic_refresh_segment_id_boosted(segment_id); |
1682 | 0 | const int sb_qindex = |
1683 | 0 | clamp(cm->delta_q_info.delta_q_present_flag |
1684 | 0 | ? cm->quant_params.base_qindex + x->delta_qindex |
1685 | 0 | : cm->quant_params.base_qindex, |
1686 | 0 | 0, QINDEX_RANGE - 1); |
1687 | 0 | const int qindex = is_segment_id_boosted || cpi->roi.delta_qp_enabled |
1688 | 0 | ? av1_get_qindex(&cm->seg, segment_id, sb_qindex) |
1689 | 0 | : sb_qindex; |
1690 | 0 | set_vbp_thresholds( |
1691 | 0 | cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff, |
1692 | 0 | x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd, |
1693 | 0 | is_segment_id_boosted, x->content_state_sb.lighting_change); |
1694 | |
|
1695 | 0 | src_buf = x->plane[AOM_PLANE_Y].src.buf; |
1696 | 0 | int src_stride = x->plane[AOM_PLANE_Y].src.stride; |
1697 | | |
1698 | | // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, |
1699 | | // 5-20 for the 16x16 blocks. |
1700 | 0 | force_split[0] = PART_EVAL_ALL; |
1701 | 0 | memset(x->part_search_info.variance_low, 0, |
1702 | 0 | sizeof(x->part_search_info.variance_low)); |
1703 | | |
1704 | | // Check if LAST frame is NULL, and if so, treat this frame |
1705 | | // as a key frame, for the purpose of the superblock partitioning. |
1706 | | // LAST == NULL can happen in cases where enhancement spatial layers are |
1707 | | // enabled dyanmically and the only reference is the spatial(GOLDEN). |
1708 | | // If LAST frame has a different resolution: set the scaled_ref_last flag |
1709 | | // and check if ref_scaled is NULL. |
1710 | 0 | if (!frame_is_intra_only(cm)) { |
1711 | 0 | const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME); |
1712 | 0 | if (ref == NULL) { |
1713 | 0 | is_key_frame = true; |
1714 | 0 | } else if (ref->y_crop_height != cm->height || |
1715 | 0 | ref->y_crop_width != cm->width) { |
1716 | 0 | scaled_ref_last = true; |
1717 | 0 | const YV12_BUFFER_CONFIG *ref_scaled = |
1718 | 0 | av1_get_scaled_ref_frame(cpi, LAST_FRAME); |
1719 | 0 | if (ref_scaled == NULL) is_key_frame = true; |
1720 | 0 | } |
1721 | 0 | } |
1722 | |
|
1723 | 0 | x->source_variance = UINT_MAX; |
1724 | | // For nord_pickmode: compute source_variance, only for superblocks with |
1725 | | // some motion for now. This input can then be used to bias the partitioning |
1726 | | // or the chroma_check. |
1727 | 0 | if (cpi->sf.rt_sf.use_nonrd_pick_mode && |
1728 | 0 | x->content_state_sb.source_sad_nonrd > kLowSad) |
1729 | 0 | x->source_variance = av1_get_perpixel_variance_facade( |
1730 | 0 | cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y); |
1731 | |
|
1732 | 0 | if (!is_key_frame) { |
1733 | 0 | setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last, |
1734 | 0 | &ref_frame_partition, &sf_no_scale, mi_row, mi_col, |
1735 | 0 | is_small_sb, scaled_ref_last); |
1736 | |
|
1737 | 0 | MB_MODE_INFO *mi = xd->mi[0]; |
1738 | | // Use reference SB directly for zero mv. |
1739 | 0 | if (mi->mv[0].as_int != 0) { |
1740 | 0 | dst_buf = xd->plane[AOM_PLANE_Y].dst.buf; |
1741 | 0 | dst_stride = xd->plane[AOM_PLANE_Y].dst.stride; |
1742 | 0 | is_zero_motion = false; |
1743 | 0 | } else { |
1744 | 0 | dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf; |
1745 | 0 | dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride; |
1746 | 0 | } |
1747 | 0 | } else { |
1748 | 0 | dst_buf = NULL; |
1749 | 0 | dst_stride = 0; |
1750 | 0 | } |
1751 | | |
1752 | | // check and set the color sensitivity of sb. |
1753 | 0 | av1_zero(uv_sad); |
1754 | 0 | chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame, |
1755 | 0 | is_zero_motion, uv_sad); |
1756 | |
|
1757 | 0 | x->force_zeromv_skip_for_sb = 0; |
1758 | |
|
1759 | 0 | VP128x128 *vt; |
1760 | 0 | AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt))); |
1761 | 0 | vt->split = td->vt64x64; |
1762 | | |
1763 | | // If the superblock is completely static (zero source sad) and |
1764 | | // the y_sad (relative to LAST ref) is very small, take the sb_size partition |
1765 | | // and exit, and force zeromv_last skip mode for nonrd_pickmode. |
1766 | | // Only do this on the base segment (so the QP-boosted segment, if applied, |
1767 | | // can still continue cleaning/ramping up the quality). |
1768 | | // Condition on color uv_sad is also added. |
1769 | 0 | if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv && |
1770 | 0 | cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE && |
1771 | 0 | ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) { |
1772 | | // Exit here, if zero mv skip flag is set at SB level. |
1773 | 0 | if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col, |
1774 | 0 | y_sad, bsize)) |
1775 | 0 | return 0; |
1776 | 0 | } |
1777 | | |
1778 | 0 | if (cpi->noise_estimate.enabled) |
1779 | 0 | noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate); |
1780 | | |
1781 | | // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames) |
1782 | | // variances for splits. |
1783 | 0 | fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16, |
1784 | 0 | minvar_16x16, thresholds, src_buf, src_stride, |
1785 | 0 | dst_buf, dst_stride, is_key_frame, is_small_sb); |
1786 | |
|
1787 | 0 | avg_64x64 = 0; |
1788 | 0 | for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) { |
1789 | 0 | max_var_32x32[blk64_idx] = 0; |
1790 | 0 | min_var_32x32[blk64_idx] = INT_MAX; |
1791 | 0 | const int blk64_scale_idx = blk64_idx << 2; |
1792 | 0 | for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { |
1793 | 0 | const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; |
1794 | 0 | for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { |
1795 | 0 | if (!is_key_frame) continue; |
1796 | 0 | VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; |
1797 | 0 | for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) |
1798 | 0 | fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8); |
1799 | 0 | fill_variance_tree(vtemp, BLOCK_16X16); |
1800 | | // If variance of this 16x16 block is above the threshold, force block |
1801 | | // to split. This also forces a split on the upper levels. |
1802 | 0 | get_variance(&vtemp->part_variances.none); |
1803 | 0 | if (vtemp->part_variances.none.variance > thresholds[3]) { |
1804 | 0 | const int split_index = 21 + lvl1_scale_idx + lvl2_idx; |
1805 | 0 | force_split[split_index] = |
1806 | 0 | cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var |
1807 | 0 | ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3]) |
1808 | 0 | : PART_EVAL_ONLY_SPLIT; |
1809 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; |
1810 | 0 | force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; |
1811 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1812 | 0 | } |
1813 | 0 | } |
1814 | 0 | fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32); |
1815 | | // If variance of this 32x32 block is above the threshold, or if its above |
1816 | | // (some threshold of) the average variance over the sub-16x16 blocks, |
1817 | | // then force this block to split. This also forces a split on the upper |
1818 | | // (64x64) level. |
1819 | 0 | uint64_t frame_sad_thresh = 20000; |
1820 | 0 | const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P; |
1821 | 0 | if (cpi->svc.number_temporal_layers > 2 && |
1822 | 0 | cpi->svc.temporal_layer_id == 0) |
1823 | 0 | frame_sad_thresh = frame_sad_thresh << 1; |
1824 | 0 | if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) { |
1825 | 0 | get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none); |
1826 | 0 | var_32x32 = |
1827 | 0 | vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance; |
1828 | 0 | max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]); |
1829 | 0 | min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]); |
1830 | 0 | const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] - |
1831 | 0 | minvar_16x16[blk64_idx][lvl1_idx]); |
1832 | |
|
1833 | 0 | if (var_32x32 > thresholds[2] || |
1834 | 0 | (!is_key_frame && var_32x32 > (thresholds[2] >> 1) && |
1835 | 0 | var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) { |
1836 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; |
1837 | 0 | force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; |
1838 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1839 | 0 | } else if (!is_key_frame && is_360p_or_smaller && |
1840 | 0 | ((max_min_var_16X16_diff > (thresholds[2] >> 1) && |
1841 | 0 | maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) || |
1842 | 0 | (cpi->sf.rt_sf.prefer_large_partition_blocks && |
1843 | 0 | x->content_state_sb.source_sad_nonrd > kLowSad && |
1844 | 0 | cpi->rc.frame_source_sad < frame_sad_thresh && |
1845 | 0 | maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) && |
1846 | 0 | maxvar_16x16[blk64_idx][lvl1_idx] > |
1847 | 0 | (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) { |
1848 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; |
1849 | 0 | force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; |
1850 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1851 | 0 | } |
1852 | 0 | } |
1853 | 0 | } |
1854 | 0 | if (force_split[1 + blk64_idx] == PART_EVAL_ALL) { |
1855 | 0 | fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64); |
1856 | 0 | get_variance(&vt->split[blk64_idx].part_variances.none); |
1857 | 0 | var_64x64 = vt->split[blk64_idx].part_variances.none.variance; |
1858 | 0 | max_var_64x64 = AOMMAX(var_64x64, max_var_64x64); |
1859 | 0 | min_var_64x64 = AOMMIN(var_64x64, min_var_64x64); |
1860 | | // If the difference of the max-min variances of sub-blocks or max |
1861 | | // variance of a sub-block is above some threshold of then force this |
1862 | | // block to split. Only checking this for noise level >= medium, if |
1863 | | // encoder is in SVC or if we already forced large blocks. |
1864 | 0 | const int max_min_var_32x32_diff = |
1865 | 0 | max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx]; |
1866 | 0 | const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1; |
1867 | 0 | const bool check_noise_lvl = noise_level >= kMedium || |
1868 | 0 | cpi->ppi->use_svc || |
1869 | 0 | cpi->sf.rt_sf.prefer_large_partition_blocks; |
1870 | 0 | const int64_t set_threshold = 3 * (thresholds[1] >> 3); |
1871 | |
|
1872 | 0 | if (!is_key_frame && max_min_var_32x32_diff > set_threshold && |
1873 | 0 | check_max_var && check_noise_lvl) { |
1874 | 0 | force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT; |
1875 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1876 | 0 | } |
1877 | 0 | avg_64x64 += var_64x64; |
1878 | 0 | } |
1879 | 0 | if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT; |
1880 | 0 | } |
1881 | |
|
1882 | 0 | if (force_split[0] == PART_EVAL_ALL) { |
1883 | 0 | fill_variance_tree(vt, BLOCK_128X128); |
1884 | 0 | get_variance(&vt->part_variances.none); |
1885 | 0 | const int set_avg_64x64 = (9 * avg_64x64) >> 5; |
1886 | 0 | if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64) |
1887 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1888 | |
|
1889 | 0 | if (!is_key_frame && |
1890 | 0 | (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) && |
1891 | 0 | max_var_64x64 > thresholds[0] >> 1) |
1892 | 0 | force_split[0] = PART_EVAL_ONLY_SPLIT; |
1893 | 0 | } |
1894 | |
|
1895 | 0 | if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end || |
1896 | 0 | !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col, |
1897 | 0 | thresholds[0], BLOCK_16X16, force_split[0])) { |
1898 | 0 | for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) { |
1899 | 0 | const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4); |
1900 | 0 | const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4); |
1901 | 0 | const int blk64_scale_idx = blk64_idx << 2; |
1902 | | |
1903 | | // Now go through the entire structure, splitting every block size until |
1904 | | // we get to one that's got a variance lower than our threshold. |
1905 | 0 | if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64, |
1906 | 0 | mi_row + y64_idx, mi_col + x64_idx, thresholds[1], |
1907 | 0 | BLOCK_16X16, force_split[1 + blk64_idx])) |
1908 | 0 | continue; |
1909 | 0 | for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) { |
1910 | 0 | const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3); |
1911 | 0 | const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3); |
1912 | 0 | const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; |
1913 | 0 | if (set_vt_partitioning( |
1914 | 0 | cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx], |
1915 | 0 | BLOCK_32X32, (mi_row + y64_idx + y32_idx), |
1916 | 0 | (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16, |
1917 | 0 | force_split[5 + blk64_scale_idx + lvl1_idx])) |
1918 | 0 | continue; |
1919 | 0 | for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) { |
1920 | 0 | const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2); |
1921 | 0 | const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2); |
1922 | 0 | const int split_index = 21 + lvl1_scale_idx + lvl2_idx; |
1923 | 0 | VP16x16 *vtemp = |
1924 | 0 | &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; |
1925 | 0 | if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16, |
1926 | 0 | mi_row + y64_idx + y32_idx + y16_idx, |
1927 | 0 | mi_col + x64_idx + x32_idx + x16_idx, |
1928 | 0 | thresholds[3], BLOCK_8X8, |
1929 | 0 | force_split[split_index])) |
1930 | 0 | continue; |
1931 | 0 | for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) { |
1932 | 0 | const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1); |
1933 | 0 | const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1); |
1934 | 0 | set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx), |
1935 | 0 | (mi_col + x64_idx + x32_idx + x16_idx + x8_idx), |
1936 | 0 | BLOCK_8X8); |
1937 | 0 | } |
1938 | 0 | } |
1939 | 0 | } |
1940 | 0 | } |
1941 | 0 | } |
1942 | |
|
1943 | 0 | if (cpi->sf.rt_sf.short_circuit_low_temp_var) { |
1944 | 0 | set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds, |
1945 | 0 | ref_frame_partition, mi_col, mi_row, is_small_sb); |
1946 | 0 | } |
1947 | |
|
1948 | 0 | aom_free(vt); |
1949 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
1950 | | end_timing(cpi, choose_var_based_partitioning_time); |
1951 | | #endif |
1952 | 0 | return 0; |
1953 | 0 | } |