/src/aom/av1/encoder/partition_strategy.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2019, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <float.h> |
13 | | |
14 | | #include "config/aom_config.h" |
15 | | |
16 | | #include "av1/encoder/encodeframe_utils.h" |
17 | | #if CONFIG_THREE_PASS |
18 | | #include "av1/encoder/thirdpass.h" |
19 | | #endif |
20 | | #include "config/aom_dsp_rtcd.h" |
21 | | |
22 | | #include "av1/common/enums.h" |
23 | | #include "av1/common/reconinter.h" |
24 | | |
25 | | #if !CONFIG_REALTIME_ONLY |
26 | | #include "av1/encoder/cnn.h" |
27 | | #include "av1/encoder/partition_model_weights.h" |
28 | | #include "av1/encoder/partition_cnn_weights.h" |
29 | | #endif |
30 | | #include "av1/encoder/encoder.h" |
31 | | |
32 | | #include "av1/encoder/motion_search_facade.h" |
33 | | #include "av1/encoder/partition_strategy.h" |
34 | | #include "av1/encoder/partition_search.h" |
35 | | #include "av1/encoder/rdopt.h" |
36 | | |
37 | | #if !CONFIG_REALTIME_ONLY |
38 | | static inline void simple_motion_search_prune_part_features( |
39 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
40 | | int mi_row, int mi_col, BLOCK_SIZE bsize, float *features, |
41 | | int features_to_get); |
42 | | |
43 | | static bool ext_ml_model_decision_before_none( |
44 | | AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT], |
45 | | int *partition_none_allowed, int *partition_horz_allowed, |
46 | | int *partition_vert_allowed, int *do_rectangular_split, |
47 | | int *do_square_split); |
48 | | |
49 | | static bool ext_ml_model_decision_before_none_part2( |
50 | | AV1_COMP *cpi, |
51 | | const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART], |
52 | | int *prune_horz, int *prune_vert); |
53 | | |
54 | | static bool ext_ml_model_decision_after_none( |
55 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
56 | | const float *const features_after_none, int *do_square_split, |
57 | | int *do_rectangular_split); |
58 | | |
59 | | static bool ext_ml_model_decision_after_none_part2( |
60 | | AV1_COMP *const cpi, const float *const features_terminate, |
61 | | int *terminate_partition_search); |
62 | | |
63 | | static bool ext_ml_model_decision_after_split( |
64 | | AV1_COMP *const cpi, const float *const features_terminate, |
65 | | int *terminate_partition_search); |
66 | | |
67 | | static bool ext_ml_model_decision_after_split_part2( |
68 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
69 | | const float *const features_prune, int *prune_rect_part_horz, |
70 | | int *prune_rect_part_vert); |
71 | | |
72 | | static bool ext_ml_model_decision_after_rect( |
73 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
74 | | const float *const features_after_rect, int *horza_partition_allowed, |
75 | | int *horzb_partition_allowed, int *verta_partition_allowed, |
76 | | int *vertb_partition_allowed); |
77 | | |
78 | | static bool ext_ml_model_decision_after_part_ab( |
79 | | AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx, |
80 | | int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], |
81 | | int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed, |
82 | | int *const partition_vert4_allowed, unsigned int pb_source_variance, |
83 | | int mi_row, int mi_col); |
84 | | |
85 | 0 | static inline int convert_bsize_to_idx(BLOCK_SIZE bsize) { |
86 | 0 | switch (bsize) { |
87 | 0 | case BLOCK_128X128: return 0; |
88 | 0 | case BLOCK_64X64: return 1; |
89 | 0 | case BLOCK_32X32: return 2; |
90 | 0 | case BLOCK_16X16: return 3; |
91 | 0 | case BLOCK_8X8: return 4; |
92 | 0 | default: assert(0 && "Invalid bsize"); return -1; |
93 | 0 | } |
94 | 0 | } |
95 | | |
96 | 0 | static char *get_feature_file_name(int id) { |
97 | 0 | static char *feature_file_names[] = { |
98 | 0 | "feature_before_partition_none", |
99 | 0 | "feature_before_partition_none_prune_rect", |
100 | 0 | "feature_after_partition_none_prune", |
101 | 0 | "feature_after_partition_none_terminate", |
102 | 0 | "feature_after_partition_split_terminate", |
103 | 0 | "feature_after_partition_split_prune_rect", |
104 | 0 | "feature_after_partition_rect", |
105 | 0 | "feature_after_partition_ab", |
106 | 0 | }; |
107 | |
|
108 | 0 | return feature_file_names[id]; |
109 | 0 | } |
110 | | |
111 | | static void write_features_to_file(const char *const path, |
112 | | const bool is_test_mode, |
113 | | const float *features, |
114 | | const int feature_size, const int id, |
115 | | const BLOCK_SIZE bsize, const int mi_row, |
116 | 0 | const int mi_col) { |
117 | 0 | if (!WRITE_FEATURE_TO_FILE && !is_test_mode) return; |
118 | | |
119 | 0 | char filename[256]; |
120 | 0 | snprintf(filename, sizeof(filename), "%s/%s", path, |
121 | 0 | get_feature_file_name(id)); |
122 | 0 | FILE *pfile = fopen(filename, "a"); |
123 | 0 | if (pfile == NULL) return; |
124 | 0 | if (!is_test_mode) { |
125 | 0 | fprintf(pfile, "%d,%d,%d,%d,%d\n", id, (int)bsize, mi_row, mi_col, |
126 | 0 | feature_size); |
127 | 0 | } |
128 | 0 | for (int i = 0; i < feature_size; ++i) { |
129 | 0 | fprintf(pfile, "%.6f", features[i]); |
130 | 0 | if (i < feature_size - 1) fprintf(pfile, ","); |
131 | 0 | } |
132 | 0 | fprintf(pfile, "\n"); |
133 | 0 | fclose(pfile); |
134 | 0 | } |
135 | | |
136 | | // TODO(chiyotsai@google.com): This is very much a work in progress. We still |
137 | | // need to the following: |
138 | | // -- add support for hdres |
139 | | // -- add support for pruning rectangular partitions |
140 | | // -- use reconstructed pixels instead of source pixels for padding |
141 | | // -- use chroma pixels in addition to luma pixels |
142 | | static void intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x, |
143 | | int quad_tree_idx, |
144 | | int intra_cnn_based_part_prune_level, |
145 | 0 | PartitionSearchState *part_state) { |
146 | 0 | assert(cm->seq_params->sb_size >= BLOCK_64X64 && |
147 | 0 | "Invalid sb_size for intra_cnn!"); |
148 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
149 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
150 | |
|
151 | 0 | const int bsize_idx = convert_bsize_to_idx(bsize); |
152 | |
|
153 | 0 | if (bsize == BLOCK_128X128) { |
154 | 0 | return; |
155 | 0 | } |
156 | | |
157 | 0 | PartitionSearchInfo *part_info = &x->part_search_info; |
158 | | |
159 | | // Precompute the CNN part and cache the result in MACROBLOCK |
160 | 0 | if (bsize == BLOCK_64X64 && !part_info->cnn_output_valid) { |
161 | 0 | const CNN_CONFIG *cnn_config = &av1_intra_mode_cnn_partition_cnn_config; |
162 | | |
163 | | // Prepare the output |
164 | 0 | const CNN_THREAD_DATA thread_data = { .num_workers = 1, .workers = NULL }; |
165 | 0 | const int num_outputs = 4; |
166 | 0 | const int output_dims[4] = { 1, 2, 4, 8 }; |
167 | 0 | const int out_chs[4] = { CNN_BRANCH_0_OUT_CH, CNN_BRANCH_1_OUT_CH, |
168 | 0 | CNN_BRANCH_2_OUT_CH, CNN_BRANCH_3_OUT_CH }; |
169 | 0 | float *output_buffer[CNN_TOT_OUT_CH]; |
170 | |
|
171 | 0 | float **cur_output_buf = output_buffer; |
172 | 0 | float *curr_buf_ptr = part_info->cnn_buffer; |
173 | 0 | for (int output_idx = 0; output_idx < num_outputs; output_idx++) { |
174 | 0 | const int num_chs = out_chs[output_idx]; |
175 | 0 | const int ch_size = output_dims[output_idx] * output_dims[output_idx]; |
176 | 0 | for (int ch = 0; ch < num_chs; ch++) { |
177 | 0 | cur_output_buf[ch] = curr_buf_ptr; |
178 | 0 | curr_buf_ptr += ch_size; |
179 | 0 | } |
180 | 0 | cur_output_buf += num_chs; |
181 | 0 | } |
182 | |
|
183 | 0 | CNN_MULTI_OUT output = { |
184 | 0 | .num_outputs = 4, |
185 | 0 | .output_channels = out_chs, |
186 | 0 | .output_strides = output_dims, |
187 | 0 | .output_buffer = output_buffer, |
188 | 0 | }; |
189 | | |
190 | | // Prepare the input |
191 | 0 | const MACROBLOCKD *xd = &x->e_mbd; |
192 | 0 | const int bit_depth = xd->bd; |
193 | 0 | const int dc_q = |
194 | 0 | av1_dc_quant_QTX(x->qindex, 0, bit_depth) >> (bit_depth - 8); |
195 | 0 | part_info->log_q = log1pf((float)(dc_q * dc_q) / 256.0f); |
196 | 0 | part_info->log_q = |
197 | 0 | (part_info->log_q - av1_intra_mode_cnn_partition_mean[0]) / |
198 | 0 | av1_intra_mode_cnn_partition_std[0]; |
199 | |
|
200 | 0 | const int width = 65, height = 65, |
201 | 0 | stride = x->plane[AOM_PLANE_Y].src.stride; |
202 | |
|
203 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
204 | 0 | uint16_t *image[1] = { |
205 | 0 | CONVERT_TO_SHORTPTR(x->plane[AOM_PLANE_Y].src.buf) - stride - 1 |
206 | 0 | }; |
207 | |
|
208 | 0 | if (!av1_cnn_predict_img_multi_out_highbd(image, width, height, stride, |
209 | 0 | cnn_config, &thread_data, |
210 | 0 | bit_depth, &output)) { |
211 | 0 | aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, |
212 | 0 | "Error allocating CNN data"); |
213 | 0 | return; |
214 | 0 | } |
215 | 0 | } else { |
216 | 0 | uint8_t *image[1] = { x->plane[AOM_PLANE_Y].src.buf - stride - 1 }; |
217 | |
|
218 | 0 | if (!av1_cnn_predict_img_multi_out(image, width, height, stride, |
219 | 0 | cnn_config, &thread_data, &output)) { |
220 | 0 | aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, |
221 | 0 | "Error allocating CNN data"); |
222 | 0 | return; |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | 0 | part_info->cnn_output_valid = 1; |
227 | 0 | } |
228 | | |
229 | 0 | if (!part_info->cnn_output_valid) { |
230 | 0 | return; |
231 | 0 | } |
232 | | |
233 | 0 | const NN_CONFIG *dnn_configs[5] = { |
234 | 0 | NULL, |
235 | 0 | &av1_intra_mode_cnn_partition_branch_0_dnn_config, |
236 | 0 | &av1_intra_mode_cnn_partition_branch_1_dnn_config, |
237 | 0 | &av1_intra_mode_cnn_partition_branch_2_dnn_config, |
238 | 0 | &av1_intra_mode_cnn_partition_branch_3_dnn_config, |
239 | 0 | }; |
240 | |
|
241 | 0 | const NN_CONFIG *dnn_config = dnn_configs[bsize_idx]; |
242 | |
|
243 | 0 | float dnn_features[100]; |
244 | 0 | float logits[4] = { 0.0f }; |
245 | |
|
246 | 0 | const float *branch_0 = part_info->cnn_buffer; |
247 | 0 | const float *branch_1 = branch_0 + CNN_BRANCH_0_OUT_SIZE; |
248 | 0 | const float *branch_2 = branch_1 + CNN_BRANCH_1_OUT_SIZE; |
249 | 0 | const float *branch_3 = branch_2 + CNN_BRANCH_2_OUT_SIZE; |
250 | |
|
251 | 0 | if (bsize == BLOCK_64X64) { |
252 | 0 | int f_idx = 0; |
253 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_0_OUT_CH; ch_idx++) { |
254 | 0 | dnn_features[f_idx++] = branch_0[ch_idx]; |
255 | 0 | } |
256 | |
|
257 | 0 | const int spa_stride = 2 * 2; |
258 | 0 | for (int lin_idx = 0; lin_idx < spa_stride; lin_idx++) { |
259 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { |
260 | 0 | dnn_features[f_idx++] = branch_1[lin_idx + ch_idx * spa_stride]; |
261 | 0 | } |
262 | 0 | } |
263 | 0 | dnn_features[f_idx++] = part_info->log_q; |
264 | 0 | } else if (bsize == BLOCK_32X32) { |
265 | 0 | int f_idx = 0; |
266 | 0 | for (int idx = 0; idx < CNN_BRANCH_0_OUT_CH; idx++) { |
267 | 0 | dnn_features[f_idx++] = branch_0[idx]; |
268 | 0 | } |
269 | |
|
270 | 0 | const int curr_lin_idx = quad_to_linear_1[quad_tree_idx - 1]; |
271 | 0 | const int spa_stride = 2 * 2; |
272 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { |
273 | 0 | dnn_features[f_idx++] = branch_1[curr_lin_idx + ch_idx * spa_stride]; |
274 | 0 | } |
275 | 0 | dnn_features[f_idx++] = part_info->log_q; |
276 | 0 | } else if (bsize == BLOCK_16X16) { |
277 | 0 | int f_idx = 0; |
278 | 0 | const int prev_quad_idx = (quad_tree_idx - 1) / 4; |
279 | 0 | const int prev_lin_idx = quad_to_linear_1[prev_quad_idx - 1]; |
280 | 0 | const int prev_spa_stride = 2 * 2; |
281 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { |
282 | 0 | dnn_features[f_idx++] = branch_1[prev_lin_idx + ch_idx * prev_spa_stride]; |
283 | 0 | } |
284 | |
|
285 | 0 | const int curr_lin_idx = quad_to_linear_2[quad_tree_idx - 5]; |
286 | 0 | const int spa_stride = 4 * 4; |
287 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { |
288 | 0 | dnn_features[f_idx++] = branch_2[curr_lin_idx + ch_idx * spa_stride]; |
289 | 0 | } |
290 | 0 | dnn_features[f_idx++] = part_info->log_q; |
291 | 0 | } else if (bsize == BLOCK_8X8) { |
292 | 0 | int f_idx = 0; |
293 | 0 | const int prev_quad_idx = (quad_tree_idx - 1) / 4; |
294 | 0 | const int prev_lin_idx = quad_to_linear_2[prev_quad_idx - 5]; |
295 | 0 | const int prev_spa_stride = 4 * 4; |
296 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { |
297 | 0 | dnn_features[f_idx++] = branch_2[prev_lin_idx + ch_idx * prev_spa_stride]; |
298 | 0 | } |
299 | |
|
300 | 0 | const int curr_lin_idx = quad_to_linear_3[quad_tree_idx - 21]; |
301 | 0 | const int spa_stride = 8 * 8; |
302 | 0 | for (int ch_idx = 0; ch_idx < CNN_BRANCH_3_OUT_CH; ch_idx++) { |
303 | 0 | dnn_features[f_idx++] = branch_3[curr_lin_idx + ch_idx * spa_stride]; |
304 | 0 | } |
305 | 0 | dnn_features[f_idx++] = part_info->log_q; |
306 | 0 | } else { |
307 | 0 | assert(0 && "Invalid bsize in intra_cnn partition"); |
308 | 0 | } |
309 | | |
310 | | // Make decision |
311 | 0 | av1_nn_predict(dnn_features, dnn_config, 1, logits); |
312 | |
|
313 | 0 | const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; |
314 | 0 | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
315 | 0 | float split_only_thresh = 100.0f, no_split_thresh = -100.0f; |
316 | 0 | if (is_720p_or_larger) { |
317 | 0 | split_only_thresh = |
318 | 0 | av1_intra_mode_cnn_partition_split_thresh_hdres[bsize_idx]; |
319 | 0 | no_split_thresh = |
320 | 0 | av1_intra_mode_cnn_partition_no_split_thresh_hdres[bsize_idx]; |
321 | 0 | } else if (is_480p_or_larger) { |
322 | 0 | split_only_thresh = |
323 | 0 | av1_intra_mode_cnn_partition_split_thresh_midres[bsize_idx]; |
324 | 0 | no_split_thresh = |
325 | 0 | av1_intra_mode_cnn_partition_no_split_thresh_midres[bsize_idx]; |
326 | 0 | } else { |
327 | 0 | split_only_thresh = |
328 | 0 | av1_intra_mode_cnn_partition_split_thresh_lowres[bsize_idx]; |
329 | 0 | no_split_thresh = |
330 | 0 | av1_intra_mode_cnn_partition_no_split_thresh_lowres[bsize_idx]; |
331 | 0 | } |
332 | |
|
333 | 0 | if (logits[0] > split_only_thresh) { |
334 | | // As screen contents tend to choose larger partitions, do not prune |
335 | | // PARTITION_NONE when intra_cnn_based_part_prune_level=1. |
336 | 0 | if (intra_cnn_based_part_prune_level != 1) { |
337 | 0 | part_state->partition_none_allowed = 0; |
338 | 0 | } |
339 | 0 | part_state->do_square_split = 1; |
340 | 0 | av1_disable_rect_partitions(part_state); |
341 | 0 | } |
342 | |
|
343 | 0 | if (logits[0] < no_split_thresh) { |
344 | 0 | av1_disable_square_split_partition(part_state); |
345 | 0 | } |
346 | 0 | } |
347 | | |
348 | | static inline int get_simple_motion_search_prune_agg(int qindex, |
349 | | int prune_level, |
350 | 0 | int is_rect_part) { |
351 | 0 | assert(prune_level < TOTAL_AGG_LVLS); |
352 | 0 | if (prune_level == NO_PRUNING) { |
353 | 0 | return -1; |
354 | 0 | } |
355 | | |
356 | | // Aggressiveness value for SIMPLE_MOTION_SEARCH_PRUNE_LEVEL except |
357 | | // QIDX_BASED_AGG_LVL |
358 | 0 | const int sms_prune_agg_levels[TOTAL_SIMPLE_AGG_LVLS] = { 0, 1, 2, 3 }; |
359 | 0 | if (prune_level < TOTAL_SIMPLE_AGG_LVLS) { |
360 | 0 | return sms_prune_agg_levels[prune_level]; |
361 | 0 | } |
362 | | |
363 | | // Map the QIDX_BASED_AGG_LVL to corresponding aggressiveness value. |
364 | | // Aggressive pruning for lower quantizers in non-boosted frames to prune |
365 | | // rectangular partitions. |
366 | 0 | const int qband = is_rect_part ? (qindex <= 90 ? 1 : 0) : 0; |
367 | 0 | const int sms_prune_agg_qindex_based[2] = { 1, 2 }; |
368 | 0 | return sms_prune_agg_qindex_based[qband]; |
369 | 0 | } |
370 | | |
371 | | // Performs a simple_motion_search with a single reference frame and extract |
372 | | // the variance of residues. Then use the features to determine whether we want |
373 | | // to go straight to splitting without trying PARTITION_NONE |
374 | | static void simple_motion_search_based_split(AV1_COMP *const cpi, MACROBLOCK *x, |
375 | | SIMPLE_MOTION_DATA_TREE *sms_tree, |
376 | 0 | PartitionSearchState *part_state) { |
377 | 0 | const AV1_COMMON *const cm = &cpi->common; |
378 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
379 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
380 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
381 | |
|
382 | 0 | const int bsize_idx = convert_bsize_to_idx(bsize); |
383 | 0 | const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; |
384 | 0 | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
385 | | // res_idx is 0 for res < 480p, 1 for 480p, 2 for 720p+ |
386 | 0 | const int res_idx = is_480p_or_larger + is_720p_or_larger; |
387 | |
|
388 | 0 | assert(bsize_idx >= 0 && bsize_idx <= 4 && |
389 | 0 | "Invalid bsize in simple_motion_search_based_split"); |
390 | |
|
391 | 0 | const float *ml_mean = av1_simple_motion_search_split_mean[bsize_idx]; |
392 | 0 | const float *ml_std = av1_simple_motion_search_split_std[bsize_idx]; |
393 | 0 | const NN_CONFIG *nn_config = |
394 | 0 | av1_simple_motion_search_split_nn_config[bsize_idx]; |
395 | |
|
396 | 0 | const int agg = get_simple_motion_search_prune_agg( |
397 | 0 | x->qindex, cpi->sf.part_sf.simple_motion_search_prune_agg, 0); |
398 | 0 | if (agg < 0) { |
399 | 0 | return; |
400 | 0 | } |
401 | | |
402 | 0 | const float split_only_thresh = |
403 | 0 | av1_simple_motion_search_split_thresh[agg][res_idx][bsize_idx]; |
404 | 0 | const float no_split_thresh = |
405 | 0 | av1_simple_motion_search_no_split_thresh[agg][res_idx][bsize_idx]; |
406 | |
|
407 | 0 | float features[FEATURE_SIZE_SMS_SPLIT] = { 0.0f }; |
408 | 0 | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
409 | 0 | bsize, features, |
410 | 0 | FEATURE_SMS_SPLIT_MODEL_FLAG); |
411 | | |
412 | | // Write features to file |
413 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
414 | 0 | cpi->ext_part_controller.test_mode, features, |
415 | 0 | FEATURE_SIZE_SMS_SPLIT, 0, bsize, mi_row, mi_col); |
416 | | |
417 | | // Note: it is intended to not normalize the features here, to keep it |
418 | | // consistent for all features collected and passed to the external model. |
419 | 0 | if (ext_ml_model_decision_before_none( |
420 | 0 | cpi, features, &part_state->partition_none_allowed, |
421 | 0 | &part_state->partition_rect_allowed[HORZ], |
422 | 0 | &part_state->partition_rect_allowed[VERT], |
423 | 0 | &part_state->do_rectangular_split, &part_state->do_square_split)) { |
424 | 0 | return; |
425 | 0 | } |
426 | | |
427 | 0 | for (int idx = 0; idx < FEATURE_SIZE_SMS_SPLIT; idx++) { |
428 | 0 | features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; |
429 | 0 | } |
430 | |
|
431 | 0 | float score = 0.0f; |
432 | |
|
433 | 0 | av1_nn_predict(features, nn_config, 1, &score); |
434 | |
|
435 | 0 | if (score > split_only_thresh) { |
436 | 0 | av1_set_square_split_only(part_state); |
437 | 0 | } |
438 | |
|
439 | 0 | if (cpi->sf.part_sf.simple_motion_search_split >= 2 && |
440 | 0 | score < no_split_thresh) { |
441 | 0 | av1_disable_square_split_partition(part_state); |
442 | 0 | } |
443 | | |
444 | | // If the score is very low, prune rectangular split since it is unlikely to |
445 | | // occur. |
446 | 0 | if (cpi->sf.part_sf.simple_motion_search_rect_split) { |
447 | 0 | const float scale = res_idx >= 2 ? 3.0f : 2.0f; |
448 | 0 | const float rect_split_thresh = |
449 | 0 | scale * av1_simple_motion_search_no_split_thresh |
450 | 0 | [cpi->sf.part_sf.simple_motion_search_rect_split][res_idx] |
451 | 0 | [bsize_idx]; |
452 | 0 | if (score < rect_split_thresh) { |
453 | 0 | part_state->do_rectangular_split = 0; |
454 | 0 | } |
455 | 0 | } |
456 | 0 | } |
457 | | |
458 | | // Given a list of ref frames in refs, performs simple_motion_search on each of |
459 | | // the refs and returns the ref with the smallest sse. Returns -1 if none of the |
460 | | // ref in the list is available. Also stores the best sse and var in best_sse, |
461 | | // best_var, respectively. If save_mv is 0, don't update mv_ref_fulls in |
462 | | // sms_tree. If save_mv is 1, update mv_ref_fulls under sms_tree and the |
463 | | // subtrees. |
464 | | static int simple_motion_search_get_best_ref( |
465 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
466 | | int mi_row, int mi_col, BLOCK_SIZE bsize, const int *const refs, |
467 | | int num_refs, int use_subpixel, int save_mv, unsigned int *best_sse, |
468 | 0 | unsigned int *best_var) { |
469 | 0 | const AV1_COMMON *const cm = &cpi->common; |
470 | 0 | int best_ref = -1; |
471 | |
|
472 | 0 | if (mi_col >= cm->mi_params.mi_cols || mi_row >= cm->mi_params.mi_rows) { |
473 | | // If the whole block is outside of the image, set the var and sse to 0. |
474 | 0 | *best_var = 0; |
475 | 0 | *best_sse = 0; |
476 | |
|
477 | 0 | return best_ref; |
478 | 0 | } |
479 | | |
480 | | // Otherwise do loop through the reference frames and find the one with the |
481 | | // minimum SSE |
482 | 0 | const int num_planes = 1; |
483 | |
|
484 | 0 | *best_sse = INT_MAX; |
485 | |
|
486 | 0 | for (int ref_idx = 0; ref_idx < num_refs; ref_idx++) { |
487 | 0 | const int ref = refs[ref_idx]; |
488 | |
|
489 | 0 | if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref]) { |
490 | 0 | const FULLPEL_MV *start_mvs = sms_tree->start_mvs; |
491 | 0 | unsigned int curr_sse = 0, curr_var = 0; |
492 | 0 | const int_mv best_mv = av1_simple_motion_search_sse_var( |
493 | 0 | cpi, x, mi_row, mi_col, bsize, ref, start_mvs[ref], num_planes, |
494 | 0 | use_subpixel, &curr_sse, &curr_var); |
495 | 0 | if (curr_sse < *best_sse) { |
496 | 0 | *best_sse = curr_sse; |
497 | 0 | *best_var = curr_var; |
498 | 0 | best_ref = ref; |
499 | 0 | } |
500 | |
|
501 | 0 | if (save_mv) { |
502 | 0 | sms_tree->start_mvs[ref].row = best_mv.as_mv.row / 8; |
503 | 0 | sms_tree->start_mvs[ref].col = best_mv.as_mv.col / 8; |
504 | |
|
505 | 0 | if (bsize >= BLOCK_8X8) { |
506 | 0 | for (int r_idx = 0; r_idx < SUB_PARTITIONS_SPLIT; r_idx++) { |
507 | | // Propagate the new motion vectors to a lower level |
508 | 0 | SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx]; |
509 | 0 | sub_tree->start_mvs[ref] = sms_tree->start_mvs[ref]; |
510 | 0 | } |
511 | 0 | } |
512 | 0 | } |
513 | 0 | } |
514 | 0 | } |
515 | |
|
516 | 0 | return best_ref; |
517 | 0 | } |
518 | | |
519 | | // Collects features using simple_motion_search and store them in features. The |
520 | | // features are also cached in SIMPLE_MOTION_DATA_TREE. By default, the features |
521 | | // collected are the sse and var from the subblocks flagged by features_to_get. |
522 | | // Furthermore, if features is not NULL, then 7 more features are appended to |
523 | | // the end of features: |
524 | | // - log(1.0 + dc_q ** 2) |
525 | | // - whether an above macroblock exists |
526 | | // - width of above macroblock |
527 | | // - height of above macroblock |
528 | | // - whether a left marcoblock exists |
529 | | // - width of left macroblock |
530 | | // - height of left macroblock |
531 | | static inline void simple_motion_search_prune_part_features( |
532 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
533 | | int mi_row, int mi_col, BLOCK_SIZE bsize, float *features, |
534 | 0 | int features_to_get) { |
535 | 0 | const int w_mi = mi_size_wide[bsize]; |
536 | 0 | const int h_mi = mi_size_high[bsize]; |
537 | 0 | assert(mi_size_wide[bsize] == mi_size_high[bsize]); |
538 | 0 | assert(bsize >= BLOCK_8X8); |
539 | 0 | assert(cpi->ref_frame_flags & av1_ref_frame_flag_list[LAST_FRAME] || |
540 | 0 | cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]); |
541 | | |
542 | | // Setting up motion search |
543 | 0 | const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
544 | 0 | : LAST_FRAME }; |
545 | 0 | const int num_refs = 1; |
546 | 0 | const int use_subpixel = 1; |
547 | | |
548 | | // Doing whole block first to update the mv |
549 | 0 | if (!sms_tree->sms_none_valid && features_to_get & FEATURE_SMS_NONE_FLAG) { |
550 | 0 | simple_motion_search_get_best_ref(cpi, x, sms_tree, mi_row, mi_col, bsize, |
551 | 0 | ref_list, num_refs, use_subpixel, 1, |
552 | 0 | &sms_tree->sms_none_feat[0], |
553 | 0 | &sms_tree->sms_none_feat[1]); |
554 | 0 | sms_tree->sms_none_valid = 1; |
555 | 0 | } |
556 | | |
557 | | // Split subblocks |
558 | 0 | if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { |
559 | 0 | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
560 | 0 | for (int r_idx = 0; r_idx < SUB_PARTITIONS_SPLIT; r_idx++) { |
561 | 0 | const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2; |
562 | 0 | const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2; |
563 | 0 | SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx]; |
564 | |
|
565 | 0 | if (!sub_tree->sms_none_valid) { |
566 | 0 | simple_motion_search_get_best_ref( |
567 | 0 | cpi, x, sub_tree, sub_mi_row, sub_mi_col, subsize, ref_list, |
568 | 0 | num_refs, use_subpixel, 1, &sub_tree->sms_none_feat[0], |
569 | 0 | &sub_tree->sms_none_feat[1]); |
570 | 0 | sub_tree->sms_none_valid = 1; |
571 | 0 | } |
572 | 0 | } |
573 | 0 | } |
574 | | |
575 | | // Rectangular subblocks |
576 | 0 | if (!sms_tree->sms_rect_valid && features_to_get & FEATURE_SMS_RECT_FLAG) { |
577 | | // Horz subblock |
578 | 0 | BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); |
579 | 0 | for (int r_idx = 0; r_idx < SUB_PARTITIONS_RECT; r_idx++) { |
580 | 0 | const int sub_mi_col = mi_col + 0; |
581 | 0 | const int sub_mi_row = mi_row + r_idx * h_mi / 2; |
582 | |
|
583 | 0 | simple_motion_search_get_best_ref( |
584 | 0 | cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
585 | 0 | use_subpixel, 0, &sms_tree->sms_rect_feat[2 * r_idx], |
586 | 0 | &sms_tree->sms_rect_feat[2 * r_idx + 1]); |
587 | 0 | } |
588 | | |
589 | | // Vert subblock |
590 | 0 | subsize = get_partition_subsize(bsize, PARTITION_VERT); |
591 | 0 | for (int r_idx = 0; r_idx < SUB_PARTITIONS_RECT; r_idx++) { |
592 | 0 | const int sub_mi_col = mi_col + r_idx * w_mi / 2; |
593 | 0 | const int sub_mi_row = mi_row + 0; |
594 | |
|
595 | 0 | simple_motion_search_get_best_ref( |
596 | 0 | cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
597 | 0 | use_subpixel, 0, &sms_tree->sms_rect_feat[4 + 2 * r_idx], |
598 | 0 | &sms_tree->sms_rect_feat[4 + 2 * r_idx + 1]); |
599 | 0 | } |
600 | 0 | sms_tree->sms_rect_valid = 1; |
601 | 0 | } |
602 | |
|
603 | 0 | if (!features) return; |
604 | | |
605 | 0 | int f_idx = 0; |
606 | 0 | if (features_to_get & FEATURE_SMS_NONE_FLAG) { |
607 | 0 | for (int sub_idx = 0; sub_idx < 2; sub_idx++) { |
608 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_none_feat[sub_idx]); |
609 | 0 | } |
610 | 0 | } |
611 | |
|
612 | 0 | if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { |
613 | 0 | for (int sub_idx = 0; sub_idx < SUB_PARTITIONS_SPLIT; sub_idx++) { |
614 | 0 | SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[sub_idx]; |
615 | 0 | features[f_idx++] = log1pf((float)sub_tree->sms_none_feat[0]); |
616 | 0 | features[f_idx++] = log1pf((float)sub_tree->sms_none_feat[1]); |
617 | 0 | } |
618 | 0 | } |
619 | |
|
620 | 0 | if (features_to_get & FEATURE_SMS_RECT_FLAG) { |
621 | 0 | for (int sub_idx = 0; sub_idx < 8; sub_idx++) { |
622 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[sub_idx]); |
623 | 0 | } |
624 | 0 | } |
625 | |
|
626 | 0 | const MACROBLOCKD *xd = &x->e_mbd; |
627 | 0 | set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); |
628 | | |
629 | | // Q_INDEX |
630 | 0 | const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
631 | 0 | features[f_idx++] = log1pf((float)(dc_q * dc_q) / 256.0f); |
632 | | |
633 | | // Neighbor stuff |
634 | 0 | const int has_above = !!xd->above_mbmi; |
635 | 0 | const int has_left = !!xd->left_mbmi; |
636 | 0 | const BLOCK_SIZE above_bsize = has_above ? xd->above_mbmi->bsize : bsize; |
637 | 0 | const BLOCK_SIZE left_bsize = has_left ? xd->left_mbmi->bsize : bsize; |
638 | 0 | features[f_idx++] = (float)has_above; |
639 | 0 | features[f_idx++] = (float)mi_size_wide_log2[above_bsize]; |
640 | 0 | features[f_idx++] = (float)mi_size_high_log2[above_bsize]; |
641 | 0 | features[f_idx++] = (float)has_left; |
642 | 0 | features[f_idx++] = (float)mi_size_wide_log2[left_bsize]; |
643 | 0 | features[f_idx++] = (float)mi_size_high_log2[left_bsize]; |
644 | 0 | } |
645 | | |
646 | | // Performs a simple_motion_search with two reference frames and extract |
647 | | // the variance of residues. Then use the features to determine whether we want |
648 | | // to prune some partitions. |
649 | | static void simple_motion_search_prune_rect(AV1_COMP *const cpi, MACROBLOCK *x, |
650 | | SIMPLE_MOTION_DATA_TREE *sms_tree, |
651 | 0 | PartitionSearchState *part_state) { |
652 | 0 | const AV1_COMMON *const cm = &cpi->common; |
653 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
654 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
655 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
656 | |
|
657 | 0 | const int bsize_idx = convert_bsize_to_idx(bsize); |
658 | 0 | const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; |
659 | 0 | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
660 | | // res_idx is 0 for lowres, 1 for 48p, 2 for 720p+ |
661 | 0 | const int res_idx = is_480p_or_larger + is_720p_or_larger; |
662 | | |
663 | | // Get model parameters |
664 | 0 | const NN_CONFIG *nn_config = |
665 | 0 | av1_simple_motion_search_prune_rect_nn_config[bsize_idx]; |
666 | 0 | const float *ml_mean = av1_simple_motion_search_prune_rect_mean[bsize_idx], |
667 | 0 | *ml_std = av1_simple_motion_search_prune_rect_std[bsize_idx]; |
668 | |
|
669 | 0 | const int agg = get_simple_motion_search_prune_agg( |
670 | 0 | x->qindex, cpi->sf.part_sf.simple_motion_search_prune_agg, 1); |
671 | 0 | if (agg < 0) { |
672 | 0 | return; |
673 | 0 | } |
674 | | |
675 | 0 | const float prune_thresh = |
676 | 0 | av1_simple_motion_search_prune_rect_thresh[agg][res_idx][bsize_idx]; |
677 | | |
678 | | // If there is no valid threshold, return immediately. |
679 | 0 | if (!nn_config || prune_thresh == 0.0f) { |
680 | 0 | return; |
681 | 0 | } |
682 | | |
683 | | // Get features |
684 | 0 | float features[FEATURE_SIZE_SMS_PRUNE_PART] = { 0.0f }; |
685 | 0 | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
686 | 0 | bsize, features, |
687 | 0 | FEATURE_SMS_PRUNE_PART_FLAG); |
688 | | |
689 | | // Note: it is intended to not normalize the features here, to keep it |
690 | | // consistent for all features collected and passed to the external model. |
691 | 0 | if (cpi->sf.part_sf.simple_motion_search_prune_rect && |
692 | 0 | !frame_is_intra_only(cm) && |
693 | 0 | (part_state->partition_rect_allowed[HORZ] || |
694 | 0 | part_state->partition_rect_allowed[VERT]) && |
695 | 0 | bsize >= BLOCK_8X8 && !av1_superres_scaled(cm)) { |
696 | | // Write features to file |
697 | 0 | write_features_to_file( |
698 | 0 | cpi->oxcf.partition_info_path, cpi->ext_part_controller.test_mode, |
699 | 0 | features, FEATURE_SIZE_SMS_PRUNE_PART, 1, bsize, mi_row, mi_col); |
700 | |
|
701 | 0 | if (ext_ml_model_decision_before_none_part2( |
702 | 0 | cpi, features, &part_state->prune_rect_part[HORZ], |
703 | 0 | &part_state->prune_rect_part[VERT])) { |
704 | 0 | return; |
705 | 0 | } |
706 | 0 | } |
707 | | |
708 | 0 | for (int f_idx = 0; f_idx < FEATURE_SIZE_SMS_PRUNE_PART; f_idx++) { |
709 | 0 | features[f_idx] = (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
710 | 0 | } |
711 | | |
712 | | // Get probabilities |
713 | 0 | float scores[EXT_PARTITION_TYPES] = { 0.0f }, |
714 | 0 | probs[EXT_PARTITION_TYPES] = { 0.0f }; |
715 | 0 | const int num_classes = (bsize == BLOCK_128X128 || bsize == BLOCK_8X8) |
716 | 0 | ? PARTITION_TYPES |
717 | 0 | : EXT_PARTITION_TYPES; |
718 | |
|
719 | 0 | av1_nn_predict(features, nn_config, 1, scores); |
720 | |
|
721 | 0 | av1_nn_softmax(scores, probs, num_classes); |
722 | | |
723 | | // Determine if we should prune rectangular partitions. |
724 | 0 | if (probs[PARTITION_HORZ] <= prune_thresh) { |
725 | 0 | part_state->prune_rect_part[HORZ] = 1; |
726 | 0 | } |
727 | 0 | if (probs[PARTITION_VERT] <= prune_thresh) { |
728 | 0 | part_state->prune_rect_part[VERT] = 1; |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | | // Early terminates PARTITION_NONE using simple_motion_search features and the |
733 | | // rate, distortion, and rdcost of PARTITION_NONE. This is only called when: |
734 | | // - The frame is a show frame |
735 | | // - The frame is not intra only |
736 | | // - The current bsize is > BLOCK_8X8 |
737 | | // - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols |
738 | | void av1_simple_motion_search_early_term_none( |
739 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
740 | 0 | const RD_STATS *none_rdc, PartitionSearchState *part_state) { |
741 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
742 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
743 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
744 | |
|
745 | 0 | float features[FEATURE_SIZE_SMS_TERM_NONE] = { 0.0f }; |
746 | 0 | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
747 | 0 | bsize, features, |
748 | 0 | FEATURE_SMS_PRUNE_PART_FLAG); |
749 | 0 | int f_idx = FEATURE_SIZE_SMS_PRUNE_PART; |
750 | |
|
751 | 0 | features[f_idx++] = log1pf((float)none_rdc->rate); |
752 | 0 | features[f_idx++] = log1pf((float)none_rdc->dist); |
753 | 0 | features[f_idx++] = log1pf((float)none_rdc->rdcost); |
754 | |
|
755 | 0 | assert(f_idx == FEATURE_SIZE_SMS_TERM_NONE); |
756 | |
|
757 | 0 | const float *ml_mean = NULL; |
758 | 0 | const float *ml_std = NULL; |
759 | 0 | const float *ml_model = NULL; |
760 | |
|
761 | 0 | if (bsize == BLOCK_128X128) { |
762 | 0 | ml_mean = av1_simple_motion_search_term_none_mean_128; |
763 | 0 | ml_std = av1_simple_motion_search_term_none_std_128; |
764 | 0 | ml_model = av1_simple_motion_search_term_none_model_128; |
765 | 0 | } else if (bsize == BLOCK_64X64) { |
766 | 0 | ml_mean = av1_simple_motion_search_term_none_mean_64; |
767 | 0 | ml_std = av1_simple_motion_search_term_none_std_64; |
768 | 0 | ml_model = av1_simple_motion_search_term_none_model_64; |
769 | 0 | } else if (bsize == BLOCK_32X32) { |
770 | 0 | ml_mean = av1_simple_motion_search_term_none_mean_32; |
771 | 0 | ml_std = av1_simple_motion_search_term_none_std_32; |
772 | 0 | ml_model = av1_simple_motion_search_term_none_model_32; |
773 | 0 | } else if (bsize == BLOCK_16X16) { |
774 | 0 | ml_mean = av1_simple_motion_search_term_none_mean_16; |
775 | 0 | ml_std = av1_simple_motion_search_term_none_std_16; |
776 | 0 | ml_model = av1_simple_motion_search_term_none_model_16; |
777 | 0 | } else { |
778 | 0 | assert(0 && "Unexpected block size in simple_motion_term_none"); |
779 | 0 | } |
780 | | |
781 | | // Write features to file |
782 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
783 | 0 | cpi->ext_part_controller.test_mode, features, |
784 | 0 | FEATURE_SIZE_SMS_TERM_NONE, 3, bsize, mi_row, mi_col); |
785 | |
|
786 | 0 | if (ext_ml_model_decision_after_none_part2( |
787 | 0 | cpi, features, &part_state->terminate_partition_search)) { |
788 | 0 | return; |
789 | 0 | } |
790 | | |
791 | 0 | if (ml_model) { |
792 | 0 | float score = 0.0f; |
793 | 0 | for (f_idx = 0; f_idx < FEATURE_SIZE_SMS_TERM_NONE; f_idx++) { |
794 | 0 | score += |
795 | 0 | ml_model[f_idx] * (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
796 | 0 | } |
797 | 0 | score += ml_model[FEATURE_SIZE_SMS_TERM_NONE]; |
798 | |
|
799 | 0 | if (score >= 0.0f) { |
800 | 0 | part_state->terminate_partition_search = 1; |
801 | 0 | } |
802 | 0 | } |
803 | 0 | } |
804 | | |
805 | | void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x, |
806 | | int mi_row, int mi_col, |
807 | 0 | float *features) { |
808 | 0 | AV1_COMMON *const cm = &cpi->common; |
809 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
810 | 0 | const BLOCK_SIZE sb_size = cm->seq_params->sb_size; |
811 | | |
812 | | // Currently this only allows 128X128 SB size. May extend it to 64X64 SB size. |
813 | 0 | assert(sb_size == BLOCK_128X128); |
814 | |
|
815 | 0 | int f_idx = 0; |
816 | |
|
817 | 0 | const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
818 | 0 | const float log_q_sq = log1pf((float)(dc_q * dc_q) / 256.0f); |
819 | | |
820 | | // Perform full-pixel single motion search in Y plane of 16x16 mbs in the sb |
821 | 0 | float sum_mv_row_sq = 0; |
822 | 0 | float sum_mv_row = 0; |
823 | 0 | float min_abs_mv_row = FLT_MAX; |
824 | 0 | float max_abs_mv_row = 0; |
825 | |
|
826 | 0 | float sum_mv_col_sq = 0; |
827 | 0 | float sum_mv_col = 0; |
828 | 0 | float min_abs_mv_col = FLT_MAX; |
829 | 0 | float max_abs_mv_col = 0; |
830 | |
|
831 | 0 | float sum_log_sse_sq = 0; |
832 | 0 | float sum_log_sse = 0; |
833 | 0 | float min_log_sse = FLT_MAX; |
834 | 0 | float max_log_sse = 0; |
835 | |
|
836 | 0 | const BLOCK_SIZE mb_size = BLOCK_16X16; |
837 | 0 | const int mb_rows = block_size_high[sb_size] / block_size_high[mb_size]; |
838 | 0 | const int mb_cols = block_size_wide[sb_size] / block_size_wide[mb_size]; |
839 | 0 | const int mb_in_mi_size_high_log2 = mi_size_high_log2[mb_size]; |
840 | 0 | const int mb_in_mi_size_wide_log2 = mi_size_wide_log2[mb_size]; |
841 | |
|
842 | 0 | for (int mb_row = 0; mb_row < mb_rows; mb_row++) |
843 | 0 | for (int mb_col = 0; mb_col < mb_cols; mb_col++) { |
844 | 0 | const int this_mi_row = mi_row + (mb_row << mb_in_mi_size_high_log2); |
845 | 0 | const int this_mi_col = mi_col + (mb_col << mb_in_mi_size_wide_log2); |
846 | 0 | unsigned int sse = 0; |
847 | 0 | unsigned int var = 0; |
848 | 0 | const FULLPEL_MV start_mv = kZeroFullMv; |
849 | 0 | const MV_REFERENCE_FRAME ref = |
850 | 0 | cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; |
851 | 0 | const int_mv best_mv = av1_simple_motion_search_sse_var( |
852 | 0 | cpi, x, this_mi_row, this_mi_col, mb_size, ref, start_mv, 1, 0, &sse, |
853 | 0 | &var); |
854 | |
|
855 | 0 | const float mv_row = (float)(best_mv.as_mv.row / 8); |
856 | 0 | const float mv_col = (float)(best_mv.as_mv.col / 8); |
857 | 0 | const float log_sse = log1pf((float)sse); |
858 | 0 | const float abs_mv_row = fabsf(mv_row); |
859 | 0 | const float abs_mv_col = fabsf(mv_col); |
860 | |
|
861 | 0 | sum_mv_row_sq += mv_row * mv_row; |
862 | 0 | sum_mv_row += mv_row; |
863 | 0 | sum_mv_col_sq += mv_col * mv_col; |
864 | 0 | sum_mv_col += mv_col; |
865 | |
|
866 | 0 | if (abs_mv_row < min_abs_mv_row) min_abs_mv_row = abs_mv_row; |
867 | 0 | if (abs_mv_row > max_abs_mv_row) max_abs_mv_row = abs_mv_row; |
868 | 0 | if (abs_mv_col < min_abs_mv_col) min_abs_mv_col = abs_mv_col; |
869 | 0 | if (abs_mv_col > max_abs_mv_col) max_abs_mv_col = abs_mv_col; |
870 | |
|
871 | 0 | sum_log_sse_sq += log_sse * log_sse; |
872 | 0 | sum_log_sse += log_sse; |
873 | 0 | if (log_sse < min_log_sse) min_log_sse = log_sse; |
874 | 0 | if (log_sse > max_log_sse) max_log_sse = log_sse; |
875 | 0 | } |
876 | 0 | const int blks = mb_rows * mb_cols; |
877 | 0 | const float avg_mv_row = sum_mv_row / (float)blks; |
878 | 0 | const float var_mv_row = |
879 | 0 | sum_mv_row_sq / (float)blks - avg_mv_row * avg_mv_row; |
880 | |
|
881 | 0 | const float avg_mv_col = sum_mv_col / (float)blks; |
882 | 0 | const float var_mv_col = |
883 | 0 | sum_mv_col_sq / (float)blks - avg_mv_col * avg_mv_col; |
884 | |
|
885 | 0 | const float avg_log_sse = sum_log_sse / (float)blks; |
886 | 0 | const float var_log_sse = |
887 | 0 | sum_log_sse_sq / (float)blks - avg_log_sse * avg_log_sse; |
888 | |
|
889 | 0 | features[f_idx++] = avg_log_sse; |
890 | 0 | features[f_idx++] = avg_mv_col; |
891 | 0 | features[f_idx++] = avg_mv_row; |
892 | 0 | features[f_idx++] = log_q_sq; |
893 | 0 | features[f_idx++] = max_abs_mv_col; |
894 | 0 | features[f_idx++] = max_abs_mv_row; |
895 | 0 | features[f_idx++] = max_log_sse; |
896 | 0 | features[f_idx++] = min_abs_mv_col; |
897 | 0 | features[f_idx++] = min_abs_mv_row; |
898 | 0 | features[f_idx++] = min_log_sse; |
899 | 0 | features[f_idx++] = var_log_sse; |
900 | 0 | features[f_idx++] = var_mv_col; |
901 | 0 | features[f_idx++] = var_mv_row; |
902 | |
|
903 | 0 | assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED); |
904 | 0 | } |
905 | | |
906 | | // Convert result index to block size. |
907 | | // result idx block size |
908 | | // 0 BLOCK_16X16 |
909 | | // 1 BLOCK_32X32 |
910 | | // 2 BLOCK_64X64 |
911 | | // 3 BLOCK_128X128 |
912 | 0 | static BLOCK_SIZE get_block_size(int idx) { |
913 | 0 | return (BLOCK_SIZE)((idx + 2) * 3); |
914 | 0 | } |
915 | | |
916 | | BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi, |
917 | | const MACROBLOCK *const x, |
918 | 0 | const float *features) { |
919 | 0 | float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; |
920 | 0 | const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config; |
921 | |
|
922 | 0 | assert(cpi->sf.part_sf.auto_max_partition_based_on_simple_motion != |
923 | 0 | NOT_IN_USE); |
924 | |
|
925 | 0 | av1_nn_predict(features, nn_config, 1, scores); |
926 | |
|
927 | 0 | int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; |
928 | 0 | if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == |
929 | 0 | DIRECT_PRED) { |
930 | 0 | result = 0; |
931 | 0 | float max_score = scores[0]; |
932 | 0 | for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) { |
933 | 0 | if (scores[i] > max_score) { |
934 | 0 | max_score = scores[i]; |
935 | 0 | result = i; |
936 | 0 | } |
937 | 0 | } |
938 | 0 | return get_block_size(result); |
939 | 0 | } |
940 | | |
941 | 0 | float probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; |
942 | 0 | av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED); |
943 | |
|
944 | 0 | if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == |
945 | 0 | RELAXED_PRED) { |
946 | 0 | for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; |
947 | 0 | --result) { |
948 | 0 | if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { |
949 | 0 | probs[result] += probs[result + 1]; |
950 | 0 | } |
951 | 0 | if (probs[result] > 0.2) break; |
952 | 0 | } |
953 | 0 | } else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == |
954 | 0 | ADAPT_PRED) { |
955 | 0 | const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size; |
956 | | // TODO(debargha): x->source_variance is unavailable at this point, |
957 | | // so compute. The redundant recomputation later can be removed. |
958 | 0 | const unsigned int source_variance = av1_get_perpixel_variance_facade( |
959 | 0 | cpi, &x->e_mbd, &x->plane[0].src, sb_size, AOM_PLANE_Y); |
960 | 0 | if (source_variance > 16) { |
961 | 0 | const double thresh = source_variance < 128 ? 0.05 : 0.1; |
962 | 0 | for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; |
963 | 0 | --result) { |
964 | 0 | if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { |
965 | 0 | probs[result] += probs[result + 1]; |
966 | 0 | } |
967 | 0 | if (probs[result] > thresh) break; |
968 | 0 | } |
969 | 0 | } |
970 | 0 | } |
971 | |
|
972 | 0 | return get_block_size(result); |
973 | 0 | } |
974 | | |
975 | | // Get the minimum partition block width and height(in log scale) under a |
976 | | // SIMPLE_MOTION_DATA_TREE. |
977 | | static inline void get_min_bsize(const SIMPLE_MOTION_DATA_TREE *sms_tree, |
978 | 0 | int *min_bw, int *min_bh) { |
979 | 0 | if (!sms_tree) return; |
980 | | |
981 | 0 | const BLOCK_SIZE bsize = sms_tree->block_size; |
982 | 0 | if (bsize == BLOCK_4X4) { |
983 | 0 | *min_bw = 0; |
984 | 0 | *min_bh = 0; |
985 | 0 | return; |
986 | 0 | } |
987 | | |
988 | 0 | PARTITION_TYPE part_type = sms_tree->partitioning; |
989 | 0 | if (part_type == PARTITION_INVALID) return; |
990 | | |
991 | 0 | if (part_type == PARTITION_SPLIT) { |
992 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
993 | 0 | get_min_bsize(sms_tree->split[i], min_bw, min_bh); |
994 | 0 | } |
995 | 0 | } else { |
996 | 0 | if (part_type == PARTITION_HORZ_A || part_type == PARTITION_HORZ_B || |
997 | 0 | part_type == PARTITION_VERT_A || part_type == PARTITION_VERT_B) |
998 | 0 | part_type = PARTITION_SPLIT; |
999 | 0 | const BLOCK_SIZE subsize = get_partition_subsize(bsize, part_type); |
1000 | 0 | if (subsize != BLOCK_INVALID) { |
1001 | 0 | *min_bw = AOMMIN(*min_bw, mi_size_wide_log2[subsize]); |
1002 | 0 | *min_bh = AOMMIN(*min_bh, mi_size_high_log2[subsize]); |
1003 | 0 | } |
1004 | 0 | } |
1005 | 0 | } |
1006 | | |
1007 | | static inline void add_rd_feature(int64_t rd, int64_t best_rd, float *features, |
1008 | 0 | int *feature_idx) { |
1009 | 0 | const int rd_valid = rd > 0 && rd < INT64_MAX; |
1010 | 0 | const float rd_ratio = rd_valid ? (float)rd / best_rd : 1.0f; |
1011 | 0 | features[(*feature_idx)++] = (float)rd_valid; |
1012 | 0 | features[(*feature_idx)++] = rd_ratio; |
1013 | 0 | } |
1014 | | |
1015 | 0 | #define FEATURES 31 |
1016 | | void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x, |
1017 | | SIMPLE_MOTION_DATA_TREE *const sms_tree, |
1018 | | int64_t best_rd, int64_t part_none_rd, |
1019 | | int64_t part_split_rd, |
1020 | | int64_t *split_block_rd, |
1021 | 0 | PartitionSearchState *part_state) { |
1022 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1023 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
1024 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1025 | |
|
1026 | 0 | if (best_rd <= 0 || best_rd == INT64_MAX || |
1027 | 0 | part_state->terminate_partition_search) |
1028 | 0 | return; |
1029 | | |
1030 | 0 | const AV1_COMMON *const cm = &cpi->common; |
1031 | 0 | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
1032 | 0 | const NN_CONFIG *nn_config = NULL; |
1033 | 0 | float thresh = -1e6; |
1034 | 0 | switch (bsize) { |
1035 | 0 | case BLOCK_128X128: break; |
1036 | 0 | case BLOCK_64X64: |
1037 | 0 | nn_config = &av1_early_term_after_split_nnconfig_64; |
1038 | 0 | thresh = is_480p_or_larger ? -2.0f : -1.2f; |
1039 | 0 | break; |
1040 | 0 | case BLOCK_32X32: |
1041 | 0 | nn_config = &av1_early_term_after_split_nnconfig_32; |
1042 | 0 | thresh = is_480p_or_larger ? -2.6f : -2.3f; |
1043 | 0 | break; |
1044 | 0 | case BLOCK_16X16: |
1045 | 0 | nn_config = &av1_early_term_after_split_nnconfig_16; |
1046 | 0 | thresh = is_480p_or_larger ? -2.0f : -2.4f; |
1047 | 0 | break; |
1048 | 0 | case BLOCK_8X8: |
1049 | 0 | nn_config = &av1_early_term_after_split_nnconfig_8; |
1050 | 0 | thresh = is_480p_or_larger ? -1.0f : -1.4f; |
1051 | 0 | break; |
1052 | 0 | case BLOCK_4X4: break; |
1053 | 0 | default: |
1054 | 0 | assert(0 && "Invalid block size in av1_ml_early_term_after_split()."); |
1055 | 0 | break; |
1056 | 0 | } |
1057 | 0 | if (!nn_config) return; |
1058 | | |
1059 | | // Use more conservative threshold for level 1. |
1060 | 0 | if (cpi->sf.part_sf.ml_early_term_after_part_split_level < 2) thresh -= 0.3f; |
1061 | |
|
1062 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1063 | 0 | const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
1064 | 0 | const int bs = block_size_wide[bsize]; |
1065 | 0 | int f_idx = 0; |
1066 | 0 | float features[FEATURES] = { 0.0f }; |
1067 | |
|
1068 | 0 | features[f_idx++] = log1pf((float)dc_q / 4.0f); |
1069 | 0 | features[f_idx++] = log1pf((float)best_rd / bs / bs / 1024.0f); |
1070 | |
|
1071 | 0 | add_rd_feature(part_none_rd, best_rd, features, &f_idx); |
1072 | 0 | add_rd_feature(part_split_rd, best_rd, features, &f_idx); |
1073 | |
|
1074 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1075 | 0 | add_rd_feature(split_block_rd[i], best_rd, features, &f_idx); |
1076 | 0 | int min_bw = MAX_SB_SIZE_LOG2; |
1077 | 0 | int min_bh = MAX_SB_SIZE_LOG2; |
1078 | 0 | get_min_bsize(sms_tree->split[i], &min_bw, &min_bh); |
1079 | 0 | features[f_idx++] = (float)min_bw; |
1080 | 0 | features[f_idx++] = (float)min_bh; |
1081 | 0 | } |
1082 | |
|
1083 | 0 | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
1084 | 0 | bsize, NULL, |
1085 | 0 | FEATURE_SMS_PRUNE_PART_FLAG); |
1086 | |
|
1087 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_none_feat[1]); |
1088 | |
|
1089 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[0]->sms_none_feat[1]); |
1090 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[1]->sms_none_feat[1]); |
1091 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[2]->sms_none_feat[1]); |
1092 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[3]->sms_none_feat[1]); |
1093 | |
|
1094 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[1]); |
1095 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[3]); |
1096 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[5]); |
1097 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[7]); |
1098 | |
|
1099 | 0 | assert(f_idx == FEATURES); |
1100 | | |
1101 | | // Write features to file |
1102 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1103 | 0 | cpi->ext_part_controller.test_mode, features, FEATURES, |
1104 | 0 | 4, bsize, mi_row, mi_col); |
1105 | |
|
1106 | 0 | if (ext_ml_model_decision_after_split( |
1107 | 0 | cpi, features, &part_state->terminate_partition_search)) { |
1108 | 0 | return; |
1109 | 0 | } |
1110 | | |
1111 | 0 | float score = 0.0f; |
1112 | 0 | av1_nn_predict(features, nn_config, 1, &score); |
1113 | | // Score is indicator of confidence that we should NOT terminate. |
1114 | 0 | if (score < thresh) { |
1115 | 0 | part_state->terminate_partition_search = 1; |
1116 | 0 | } |
1117 | 0 | } |
1118 | | #undef FEATURES |
1119 | | |
1120 | | void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x, |
1121 | | int64_t best_rd, int64_t none_rd, |
1122 | | const int64_t *split_rd, |
1123 | 0 | PartitionSearchState *part_state) { |
1124 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1125 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
1126 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1127 | |
|
1128 | 0 | if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; |
1129 | 0 | best_rd = AOMMAX(best_rd, 1); |
1130 | 0 | const NN_CONFIG *nn_config = NULL; |
1131 | 0 | const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f }; |
1132 | 0 | float cur_thresh = 0.0f; |
1133 | 0 | switch (bsize) { |
1134 | 0 | case BLOCK_8X8: |
1135 | 0 | nn_config = &av1_rect_partition_nnconfig_8; |
1136 | 0 | cur_thresh = prob_thresholds[0]; |
1137 | 0 | break; |
1138 | 0 | case BLOCK_16X16: |
1139 | 0 | nn_config = &av1_rect_partition_nnconfig_16; |
1140 | 0 | cur_thresh = prob_thresholds[1]; |
1141 | 0 | break; |
1142 | 0 | case BLOCK_32X32: |
1143 | 0 | nn_config = &av1_rect_partition_nnconfig_32; |
1144 | 0 | cur_thresh = prob_thresholds[2]; |
1145 | 0 | break; |
1146 | 0 | case BLOCK_64X64: |
1147 | 0 | nn_config = &av1_rect_partition_nnconfig_64; |
1148 | 0 | cur_thresh = prob_thresholds[3]; |
1149 | 0 | break; |
1150 | 0 | case BLOCK_128X128: |
1151 | 0 | nn_config = &av1_rect_partition_nnconfig_128; |
1152 | 0 | cur_thresh = prob_thresholds[4]; |
1153 | 0 | break; |
1154 | 0 | default: assert(0 && "Unexpected bsize."); |
1155 | 0 | } |
1156 | 0 | if (!nn_config) return; |
1157 | | |
1158 | | // 1. Compute input features |
1159 | 0 | float features[9]; |
1160 | | |
1161 | | // RD cost ratios |
1162 | 0 | for (int i = 0; i < 5; i++) features[i] = 1.0f; |
1163 | 0 | if (none_rd > 0 && none_rd < 1000000000) |
1164 | 0 | features[0] = (float)none_rd / (float)best_rd; |
1165 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { |
1166 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1167 | 0 | features[1 + i] = (float)split_rd[i] / (float)best_rd; |
1168 | 0 | } |
1169 | | |
1170 | | // Variance ratios |
1171 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1172 | 0 | int whole_block_variance; |
1173 | 0 | whole_block_variance = av1_get_perpixel_variance_facade( |
1174 | 0 | cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y); |
1175 | 0 | whole_block_variance = AOMMAX(whole_block_variance, 1); |
1176 | |
|
1177 | 0 | int split_variance[SUB_PARTITIONS_SPLIT]; |
1178 | 0 | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
1179 | 0 | struct buf_2d buf; |
1180 | 0 | buf.stride = x->plane[0].src.stride; |
1181 | 0 | const int bw = block_size_wide[bsize]; |
1182 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1183 | 0 | const int x_idx = (i & 1) * bw / 2; |
1184 | 0 | const int y_idx = (i >> 1) * bw / 2; |
1185 | 0 | buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; |
1186 | 0 | split_variance[i] = |
1187 | 0 | av1_get_perpixel_variance_facade(cpi, xd, &buf, subsize, AOM_PLANE_Y); |
1188 | 0 | } |
1189 | |
|
1190 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) |
1191 | 0 | features[5 + i] = (float)split_variance[i] / (float)whole_block_variance; |
1192 | | |
1193 | | // Write features to file |
1194 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1195 | 0 | cpi->ext_part_controller.test_mode, features, |
1196 | 0 | /*feature_size=*/9, 5, bsize, mi_row, mi_col); |
1197 | |
|
1198 | 0 | if (ext_ml_model_decision_after_split_part2( |
1199 | 0 | &cpi->ext_part_controller, frame_is_intra_only(&cpi->common), |
1200 | 0 | features, &part_state->prune_rect_part[HORZ], |
1201 | 0 | &part_state->prune_rect_part[VERT])) { |
1202 | 0 | return; |
1203 | 0 | } |
1204 | | |
1205 | | // 2. Do the prediction and prune 0-2 partitions based on their probabilities |
1206 | 0 | float raw_scores[3] = { 0.0f }; |
1207 | 0 | av1_nn_predict(features, nn_config, 1, raw_scores); |
1208 | 0 | float probs[3] = { 0.0f }; |
1209 | 0 | av1_nn_softmax(raw_scores, probs, 3); |
1210 | | |
1211 | | // probs[0] is the probability of the fact that both rectangular partitions |
1212 | | // are worse than current best_rd |
1213 | 0 | if (probs[1] <= cur_thresh) part_state->prune_rect_part[HORZ] = 1; |
1214 | 0 | if (probs[2] <= cur_thresh) part_state->prune_rect_part[VERT] = 1; |
1215 | 0 | } |
1216 | | |
1217 | | // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be |
1218 | | // considered. |
1219 | | static void ml_prune_ab_partition(AV1_COMP *const cpi, int part_ctx, |
1220 | | int var_ctx, int64_t best_rd, |
1221 | | PartitionSearchState *part_state, |
1222 | 0 | int *ab_partitions_allowed) { |
1223 | 0 | const PartitionBlkParams blk_params = part_state->part_blk_params; |
1224 | 0 | const int mi_row = blk_params.mi_row; |
1225 | 0 | const int mi_col = blk_params.mi_col; |
1226 | 0 | const BLOCK_SIZE bsize = blk_params.bsize; |
1227 | |
|
1228 | 0 | if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; |
1229 | 0 | const NN_CONFIG *nn_config = NULL; |
1230 | 0 | switch (bsize) { |
1231 | 0 | case BLOCK_8X8: nn_config = NULL; break; |
1232 | 0 | case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break; |
1233 | 0 | case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break; |
1234 | 0 | case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break; |
1235 | 0 | case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break; |
1236 | 0 | default: assert(0 && "Unexpected bsize."); |
1237 | 0 | } |
1238 | 0 | if (!nn_config) return; |
1239 | | |
1240 | | // Generate features. |
1241 | 0 | float features[10]; |
1242 | 0 | int feature_index = 0; |
1243 | 0 | features[feature_index++] = (float)part_ctx; |
1244 | 0 | features[feature_index++] = (float)var_ctx; |
1245 | 0 | const int rdcost = (int)AOMMIN(INT_MAX, best_rd); |
1246 | 0 | int sub_block_rdcost[8] = { 0 }; |
1247 | 0 | int rd_index = 0; |
1248 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1249 | 0 | const int64_t *horz_rd = part_state->rect_part_rd[HORZ]; |
1250 | 0 | if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) |
1251 | 0 | sub_block_rdcost[rd_index] = (int)horz_rd[i]; |
1252 | 0 | ++rd_index; |
1253 | 0 | } |
1254 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1255 | 0 | const int64_t *vert_rd = part_state->rect_part_rd[VERT]; |
1256 | 0 | if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) |
1257 | 0 | sub_block_rdcost[rd_index] = (int)vert_rd[i]; |
1258 | 0 | ++rd_index; |
1259 | 0 | } |
1260 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1261 | 0 | const int64_t *split_rd = part_state->split_rd; |
1262 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1263 | 0 | sub_block_rdcost[rd_index] = (int)split_rd[i]; |
1264 | 0 | ++rd_index; |
1265 | 0 | } |
1266 | 0 | for (int i = 0; i < 8; ++i) { |
1267 | | // Ratio between the sub-block RD and the whole-block RD. |
1268 | 0 | float rd_ratio = 1.0f; |
1269 | 0 | if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) |
1270 | 0 | rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; |
1271 | 0 | features[feature_index++] = rd_ratio; |
1272 | 0 | } |
1273 | 0 | assert(feature_index == 10); |
1274 | | |
1275 | | // Write features to file |
1276 | 0 | if (!frame_is_intra_only(&cpi->common)) { |
1277 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1278 | 0 | cpi->ext_part_controller.test_mode, features, |
1279 | 0 | /*feature_size=*/10, 6, bsize, mi_row, mi_col); |
1280 | 0 | } |
1281 | |
|
1282 | 0 | if (ext_ml_model_decision_after_rect( |
1283 | 0 | &cpi->ext_part_controller, frame_is_intra_only(&cpi->common), |
1284 | 0 | features, &ab_partitions_allowed[HORZ_A], |
1285 | 0 | &ab_partitions_allowed[HORZ_B], &ab_partitions_allowed[VERT_A], |
1286 | 0 | &ab_partitions_allowed[VERT_B])) { |
1287 | 0 | return; |
1288 | 0 | } |
1289 | | |
1290 | | // Calculate scores using the NN model. |
1291 | 0 | float score[16] = { 0.0f }; |
1292 | 0 | av1_nn_predict(features, nn_config, 1, score); |
1293 | 0 | int int_score[16]; |
1294 | 0 | int max_score = -1000; |
1295 | 0 | for (int i = 0; i < 16; ++i) { |
1296 | 0 | int_score[i] = (int)(100 * score[i]); |
1297 | 0 | max_score = AOMMAX(int_score[i], max_score); |
1298 | 0 | } |
1299 | | |
1300 | | // Make decisions based on the model scores. |
1301 | 0 | int thresh = max_score; |
1302 | 0 | switch (bsize) { |
1303 | 0 | case BLOCK_16X16: thresh -= 150; break; |
1304 | 0 | case BLOCK_32X32: thresh -= 100; break; |
1305 | 0 | default: break; |
1306 | 0 | } |
1307 | 0 | av1_zero_array(ab_partitions_allowed, NUM_AB_PARTS); |
1308 | 0 | for (int i = 0; i < 16; ++i) { |
1309 | 0 | if (int_score[i] >= thresh) { |
1310 | 0 | if ((i >> 0) & 1) ab_partitions_allowed[HORZ_A] = 1; |
1311 | 0 | if ((i >> 1) & 1) ab_partitions_allowed[HORZ_B] = 1; |
1312 | 0 | if ((i >> 2) & 1) ab_partitions_allowed[VERT_A] = 1; |
1313 | 0 | if ((i >> 3) & 1) ab_partitions_allowed[VERT_B] = 1; |
1314 | 0 | } |
1315 | 0 | } |
1316 | 0 | } |
1317 | | |
1318 | 0 | #define FEATURES 18 |
1319 | 0 | #define LABELS 4 |
1320 | | // Use a ML model to predict if horz4 and vert4 should be considered. |
1321 | | void av1_ml_prune_4_partition(AV1_COMP *const cpi, MACROBLOCK *const x, |
1322 | | int part_ctx, int64_t best_rd, |
1323 | | PartitionSearchState *part_state, |
1324 | | int *part4_allowed, |
1325 | 0 | unsigned int pb_source_variance) { |
1326 | 0 | const PartitionBlkParams blk_params = part_state->part_blk_params; |
1327 | 0 | const int mi_row = blk_params.mi_row; |
1328 | 0 | const int mi_col = blk_params.mi_col; |
1329 | 0 | const BLOCK_SIZE bsize = blk_params.bsize; |
1330 | |
|
1331 | 0 | int64_t(*rect_part_rd)[SUB_PARTITIONS_RECT] = part_state->rect_part_rd; |
1332 | 0 | int64_t *split_rd = part_state->split_rd; |
1333 | 0 | if (ext_ml_model_decision_after_part_ab( |
1334 | 0 | cpi, x, bsize, part_ctx, best_rd, rect_part_rd, split_rd, |
1335 | 0 | &part4_allowed[HORZ4], &part4_allowed[VERT4], pb_source_variance, |
1336 | 0 | mi_row, mi_col)) |
1337 | 0 | return; |
1338 | | |
1339 | 0 | if (best_rd >= 1000000000) return; |
1340 | 0 | int64_t *horz_rd = rect_part_rd[HORZ4]; |
1341 | 0 | int64_t *vert_rd = rect_part_rd[VERT4]; |
1342 | 0 | const NN_CONFIG *nn_config = NULL; |
1343 | | // 4-way partitions are only allowed for these three square block sizes. |
1344 | 0 | switch (bsize) { |
1345 | 0 | case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break; |
1346 | 0 | case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break; |
1347 | 0 | case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break; |
1348 | 0 | default: assert(0 && "Unexpected bsize."); |
1349 | 0 | } |
1350 | 0 | if (!nn_config) return; |
1351 | | |
1352 | | // Generate features. |
1353 | 0 | float features[FEATURES]; |
1354 | 0 | int feature_index = 0; |
1355 | 0 | features[feature_index++] = (float)part_ctx; |
1356 | 0 | features[feature_index++] = (float)get_unsigned_bits(pb_source_variance); |
1357 | |
|
1358 | 0 | const int rdcost = (int)AOMMIN(INT_MAX, best_rd); |
1359 | 0 | int sub_block_rdcost[8] = { 0 }; |
1360 | 0 | int rd_index = 0; |
1361 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1362 | 0 | if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) |
1363 | 0 | sub_block_rdcost[rd_index] = (int)horz_rd[i]; |
1364 | 0 | ++rd_index; |
1365 | 0 | } |
1366 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1367 | 0 | if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) |
1368 | 0 | sub_block_rdcost[rd_index] = (int)vert_rd[i]; |
1369 | 0 | ++rd_index; |
1370 | 0 | } |
1371 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1372 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1373 | 0 | sub_block_rdcost[rd_index] = (int)split_rd[i]; |
1374 | 0 | ++rd_index; |
1375 | 0 | } |
1376 | 0 | for (int i = 0; i < 8; ++i) { |
1377 | | // Ratio between the sub-block RD and the whole-block RD. |
1378 | 0 | float rd_ratio = 1.0f; |
1379 | 0 | if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) |
1380 | 0 | rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; |
1381 | 0 | features[feature_index++] = rd_ratio; |
1382 | 0 | } |
1383 | | |
1384 | | // Get variance of the 1:4 and 4:1 sub-blocks. |
1385 | 0 | unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
1386 | 0 | unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
1387 | 0 | { |
1388 | 0 | BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); |
1389 | 0 | BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); |
1390 | |
|
1391 | 0 | assert(horz_4_bs != BLOCK_INVALID); |
1392 | 0 | assert(vert_4_bs != BLOCK_INVALID); |
1393 | |
|
1394 | 0 | av1_setup_src_planes(x, cpi->source, mi_row, mi_col, |
1395 | 0 | av1_num_planes(&cpi->common), bsize); |
1396 | 0 | const int src_stride = x->plane[0].src.stride; |
1397 | 0 | uint8_t *src = x->plane[0].src.buf; |
1398 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1399 | |
|
1400 | 0 | struct buf_2d horz_4_src, vert_4_src; |
1401 | 0 | horz_4_src.stride = src_stride; |
1402 | 0 | vert_4_src.stride = src_stride; |
1403 | |
|
1404 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1405 | 0 | horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride; |
1406 | 0 | vert_4_src.buf = src + i * block_size_wide[vert_4_bs]; |
1407 | |
|
1408 | 0 | horz_4_source_var[i] = av1_get_perpixel_variance_facade( |
1409 | 0 | cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y); |
1410 | 0 | vert_4_source_var[i] = av1_get_perpixel_variance_facade( |
1411 | 0 | cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y); |
1412 | 0 | } |
1413 | 0 | } |
1414 | |
|
1415 | 0 | const float denom = (float)(pb_source_variance + 1); |
1416 | 0 | const float low_b = 0.1f; |
1417 | 0 | const float high_b = 10.0f; |
1418 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1419 | | // Ratio between the 4:1 sub-block variance and the whole-block variance. |
1420 | 0 | float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; |
1421 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
1422 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
1423 | 0 | features[feature_index++] = var_ratio; |
1424 | 0 | } |
1425 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1426 | | // Ratio between the 1:4 sub-block RD and the whole-block RD. |
1427 | 0 | float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; |
1428 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
1429 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
1430 | 0 | features[feature_index++] = var_ratio; |
1431 | 0 | } |
1432 | 0 | assert(feature_index == FEATURES); |
1433 | | |
1434 | | // Write features to file |
1435 | 0 | if (!frame_is_intra_only(&cpi->common)) { |
1436 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1437 | 0 | cpi->ext_part_controller.test_mode, features, |
1438 | 0 | FEATURES, 7, bsize, mi_row, mi_col); |
1439 | 0 | } |
1440 | | |
1441 | | // Calculate scores using the NN model. |
1442 | 0 | float score[LABELS] = { 0.0f }; |
1443 | 0 | av1_nn_predict(features, nn_config, 1, score); |
1444 | 0 | int int_score[LABELS]; |
1445 | 0 | int max_score = -1000; |
1446 | 0 | for (int i = 0; i < LABELS; ++i) { |
1447 | 0 | int_score[i] = (int)(100 * score[i]); |
1448 | 0 | max_score = AOMMAX(int_score[i], max_score); |
1449 | 0 | } |
1450 | | |
1451 | | // Make decisions based on the model scores. |
1452 | 0 | int thresh = max_score; |
1453 | 0 | switch (bsize) { |
1454 | 0 | case BLOCK_16X16: thresh -= 500; break; |
1455 | 0 | case BLOCK_32X32: thresh -= 500; break; |
1456 | 0 | case BLOCK_64X64: thresh -= 200; break; |
1457 | 0 | default: break; |
1458 | 0 | } |
1459 | 0 | av1_zero_array(part4_allowed, NUM_PART4_TYPES); |
1460 | 0 | for (int i = 0; i < LABELS; ++i) { |
1461 | 0 | if (int_score[i] >= thresh) { |
1462 | 0 | if ((i >> 0) & 1) part4_allowed[HORZ4] = 1; |
1463 | 0 | if ((i >> 1) & 1) part4_allowed[VERT4] = 1; |
1464 | 0 | } |
1465 | 0 | } |
1466 | 0 | } |
1467 | | #undef FEATURES |
1468 | | #undef LABELS |
1469 | | |
1470 | 0 | #define FEATURES 4 |
1471 | | void av1_ml_predict_breakout(AV1_COMP *const cpi, const MACROBLOCK *const x, |
1472 | | const RD_STATS *const rd_stats, |
1473 | | unsigned int pb_source_variance, int bit_depth, |
1474 | 0 | PartitionSearchState *part_state) { |
1475 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1476 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
1477 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1478 | |
|
1479 | 0 | const NN_CONFIG *nn_config = NULL; |
1480 | 0 | int thresh = 0; |
1481 | 0 | switch (bsize) { |
1482 | 0 | case BLOCK_8X8: |
1483 | 0 | nn_config = &av1_partition_breakout_nnconfig_8; |
1484 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[0]; |
1485 | 0 | break; |
1486 | 0 | case BLOCK_16X16: |
1487 | 0 | nn_config = &av1_partition_breakout_nnconfig_16; |
1488 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[1]; |
1489 | 0 | break; |
1490 | 0 | case BLOCK_32X32: |
1491 | 0 | nn_config = &av1_partition_breakout_nnconfig_32; |
1492 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[2]; |
1493 | 0 | break; |
1494 | 0 | case BLOCK_64X64: |
1495 | 0 | nn_config = &av1_partition_breakout_nnconfig_64; |
1496 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[3]; |
1497 | 0 | break; |
1498 | 0 | case BLOCK_128X128: |
1499 | 0 | nn_config = &av1_partition_breakout_nnconfig_128; |
1500 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[4]; |
1501 | 0 | break; |
1502 | 0 | default: assert(0 && "Unexpected bsize."); |
1503 | 0 | } |
1504 | 0 | if (!nn_config || thresh < 0) return; |
1505 | | |
1506 | 0 | const float ml_predict_breakout_thresh_scale[3] = { 1.15f, 1.05f, 1.0f }; |
1507 | 0 | thresh = (int)((float)thresh * |
1508 | 0 | ml_predict_breakout_thresh_scale |
1509 | 0 | [cpi->sf.part_sf.ml_predict_breakout_level - 1]); |
1510 | | |
1511 | | // Generate feature values. |
1512 | 0 | float features[FEATURES]; |
1513 | 0 | int feature_index = 0; |
1514 | |
|
1515 | 0 | const int num_pels_log2 = num_pels_log2_lookup[bsize]; |
1516 | 0 | float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX); |
1517 | 0 | rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * |
1518 | 0 | rate_f; |
1519 | 0 | features[feature_index++] = rate_f; |
1520 | |
|
1521 | 0 | const float dist_f = |
1522 | 0 | (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2); |
1523 | 0 | features[feature_index++] = dist_f; |
1524 | |
|
1525 | 0 | features[feature_index++] = (float)pb_source_variance; |
1526 | |
|
1527 | 0 | const int dc_q = (int)x->plane[0].dequant_QTX[0] >> (bit_depth - 8); |
1528 | 0 | features[feature_index++] = (float)(dc_q * dc_q) / 256.0f; |
1529 | 0 | assert(feature_index == FEATURES); |
1530 | | |
1531 | | // Write features to file |
1532 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1533 | 0 | cpi->ext_part_controller.test_mode, features, FEATURES, |
1534 | 0 | 2, bsize, mi_row, mi_col); |
1535 | |
|
1536 | 0 | if (ext_ml_model_decision_after_none(&cpi->ext_part_controller, |
1537 | 0 | frame_is_intra_only(&cpi->common), |
1538 | 0 | features, &part_state->do_square_split, |
1539 | 0 | &part_state->do_rectangular_split)) { |
1540 | 0 | return; |
1541 | 0 | } |
1542 | | |
1543 | | // Calculate score using the NN model. |
1544 | 0 | float score = 0.0f; |
1545 | 0 | av1_nn_predict(features, nn_config, 1, &score); |
1546 | | |
1547 | | // Make decision. |
1548 | 0 | if ((int)(score * 100) >= thresh) { |
1549 | 0 | part_state->do_square_split = 0; |
1550 | 0 | part_state->do_rectangular_split = 0; |
1551 | 0 | } |
1552 | 0 | } |
1553 | | #undef FEATURES |
1554 | | |
1555 | | void av1_prune_partitions_before_search(AV1_COMP *const cpi, |
1556 | | MACROBLOCK *const x, |
1557 | | SIMPLE_MOTION_DATA_TREE *const sms_tree, |
1558 | 0 | PartitionSearchState *part_state) { |
1559 | 0 | const AV1_COMMON *const cm = &cpi->common; |
1560 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
1561 | |
|
1562 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1563 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1564 | |
|
1565 | | #if CONFIG_THREE_PASS |
1566 | | if (cpi->third_pass_ctx) { |
1567 | | int mi_row = blk_params->mi_row; |
1568 | | int mi_col = blk_params->mi_col; |
1569 | | double ratio_h, ratio_w; |
1570 | | av1_get_third_pass_ratio(cpi->third_pass_ctx, 0, cm->height, cm->width, |
1571 | | &ratio_h, &ratio_w); |
1572 | | THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( |
1573 | | cpi->third_pass_ctx, 0, mi_row, mi_col, ratio_h, ratio_w); |
1574 | | BLOCK_SIZE third_pass_bsize = |
1575 | | av1_get_third_pass_adjusted_blk_size(this_mi, ratio_h, ratio_w); |
1576 | | // check the actual partition of this block in the second pass |
1577 | | PARTITION_TYPE third_pass_part = |
1578 | | av1_third_pass_get_sb_part_type(cpi->third_pass_ctx, this_mi); |
1579 | | |
1580 | | int is_edge = (mi_row + mi_size_high[bsize] >= cm->mi_params.mi_rows) || |
1581 | | (mi_col + mi_size_wide[bsize] >= cm->mi_params.mi_cols); |
1582 | | |
1583 | | if (!is_edge && block_size_wide[bsize] >= 16) { |
1584 | | // If in second pass we used rectangular partition, then do not search for |
1585 | | // rectangular partition in the different direction. |
1586 | | if (third_pass_part != PARTITION_NONE) { |
1587 | | if (third_pass_part == PARTITION_HORZ || |
1588 | | third_pass_part == PARTITION_HORZ_4 || |
1589 | | third_pass_part == PARTITION_HORZ_A || |
1590 | | third_pass_part == PARTITION_HORZ_B) { |
1591 | | part_state->partition_rect_allowed[VERT] = 0; |
1592 | | } else if (third_pass_part == PARTITION_VERT || |
1593 | | third_pass_part == PARTITION_VERT_4 || |
1594 | | third_pass_part == PARTITION_VERT_A || |
1595 | | third_pass_part == PARTITION_VERT_B) { |
1596 | | part_state->partition_rect_allowed[HORZ] = 0; |
1597 | | } |
1598 | | } |
1599 | | |
1600 | | int minSize = AOMMIN(block_size_wide[third_pass_bsize], |
1601 | | block_size_high[third_pass_bsize]); |
1602 | | int maxSize = AOMMAX(block_size_wide[third_pass_bsize], |
1603 | | block_size_high[third_pass_bsize]); |
1604 | | if (block_size_wide[bsize] < minSize / 4) { |
1605 | | // Current partition is too small, just terminate |
1606 | | part_state->terminate_partition_search = 1; |
1607 | | return; |
1608 | | } else if (block_size_wide[bsize] < minSize / 2) { |
1609 | | if (third_pass_part != PARTITION_NONE) { |
1610 | | // Current partition is very small, and in second pass we used |
1611 | | // rectangular partition. Terminate the search here then. |
1612 | | part_state->terminate_partition_search = 1; |
1613 | | return; |
1614 | | } else { |
1615 | | // Partition is small, but we still check this partition, only disable |
1616 | | // further splits. |
1617 | | // TODO(any): check why this is not covered by the termination for < |
1618 | | // minSize/4. |
1619 | | av1_disable_square_split_partition(part_state); |
1620 | | av1_disable_rect_partitions(part_state); |
1621 | | return; |
1622 | | } |
1623 | | } else if (block_size_wide[bsize] > maxSize) { |
1624 | | // Partition is larger than in the second pass. Only allow split. |
1625 | | av1_set_square_split_only(part_state); |
1626 | | return; |
1627 | | } else if (block_size_wide[bsize] >= minSize && |
1628 | | block_size_wide[bsize] <= maxSize) { |
1629 | | // Partition is within a range where it is very likely to find a good |
1630 | | // choice, so do not prune anything. |
1631 | | return; |
1632 | | } |
1633 | | } |
1634 | | } |
1635 | | #endif // CONFIG_THREE_PASS |
1636 | | |
1637 | | // Prune rectangular partitions for larger blocks. |
1638 | 0 | if (bsize > cpi->sf.part_sf.rect_partition_eval_thresh) { |
1639 | 0 | part_state->do_rectangular_split = 0; |
1640 | 0 | part_state->partition_rect_allowed[HORZ] = 0; |
1641 | 0 | part_state->partition_rect_allowed[VERT] = 0; |
1642 | 0 | } |
1643 | | |
1644 | | // Prune rectangular, AB and 4-way partition based on q index and block size |
1645 | 0 | if (cpi->sf.part_sf.prune_rectangular_split_based_on_qidx == 1) { |
1646 | 0 | if (bsize == BLOCK_8X8 && x->qindex < 35) |
1647 | 0 | av1_disable_rect_partitions(part_state); |
1648 | |
|
1649 | 0 | } else if (cpi->sf.part_sf.prune_rectangular_split_based_on_qidx == 2) { |
1650 | | // Enumeration difference between two square partitions |
1651 | 0 | const int sqr_bsize_step = BLOCK_32X32 - BLOCK_16X16; |
1652 | 0 | int max_bsize = |
1653 | 0 | BLOCK_32X32 - (x->qindex * 3 / QINDEX_RANGE) * sqr_bsize_step; |
1654 | 0 | max_bsize = AOMMAX(max_bsize, BLOCK_4X4); |
1655 | 0 | const BLOCK_SIZE max_prune_bsize = |
1656 | 0 | (BLOCK_SIZE)AOMMIN(max_bsize, BLOCK_32X32); |
1657 | | |
1658 | | // Prune partition |
1659 | | // qidx 0 to 85: prune bsize below BLOCK_32X32 |
1660 | | // qidx 86 to 170: prune bsize below BLOCK_16X16 |
1661 | | // qidx 171 to 255: prune bsize below BLOCK_8X8 |
1662 | 0 | if (bsize < max_prune_bsize) { |
1663 | 0 | av1_disable_rect_partitions(part_state); |
1664 | 0 | } |
1665 | 0 | } |
1666 | |
|
1667 | 0 | if (cpi->sf.part_sf.prune_sub_8x8_partition_level && (bsize == BLOCK_8X8)) { |
1668 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1669 | 0 | int prune_sub_8x8; |
1670 | 0 | if (cpi->sf.part_sf.prune_sub_8x8_partition_level == 2) { |
1671 | 0 | prune_sub_8x8 = 1; |
1672 | 0 | } else { |
1673 | 0 | assert(cpi->sf.part_sf.prune_sub_8x8_partition_level == 1); |
1674 | | // Prune if both neighbors are available and either is > BLOCK_8X8 |
1675 | 0 | prune_sub_8x8 = xd->left_available && xd->up_available && |
1676 | 0 | (xd->left_mbmi->bsize > BLOCK_8X8 || |
1677 | 0 | xd->above_mbmi->bsize > BLOCK_8X8); |
1678 | 0 | } |
1679 | 0 | if (prune_sub_8x8) { |
1680 | 0 | av1_disable_all_splits(part_state); |
1681 | 0 | } |
1682 | 0 | } |
1683 | | |
1684 | | // A CNN-based speed feature pruning out either split or all non-split |
1685 | | // partition in INTRA frame coding. |
1686 | 0 | const int try_intra_cnn_based_part_prune = |
1687 | 0 | frame_is_intra_only(cm) && |
1688 | 0 | cpi->sf.part_sf.intra_cnn_based_part_prune_level && |
1689 | 0 | cm->seq_params->sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 && |
1690 | 0 | blk_params->bsize_at_least_8x8 && |
1691 | 0 | av1_is_whole_blk_in_frame(blk_params, mi_params); |
1692 | |
|
1693 | 0 | if (try_intra_cnn_based_part_prune) { |
1694 | 0 | intra_mode_cnn_partition(&cpi->common, x, x->part_search_info.quad_tree_idx, |
1695 | 0 | cpi->sf.part_sf.intra_cnn_based_part_prune_level, |
1696 | 0 | part_state); |
1697 | 0 | } |
1698 | | |
1699 | | // Use simple motion search to prune out split or non-split partitions. This |
1700 | | // must be done prior to PARTITION_SPLIT to propagate the initial mvs to a |
1701 | | // smaller blocksize. |
1702 | 0 | const int try_split_only = |
1703 | 0 | cpi->sf.part_sf.simple_motion_search_split && |
1704 | 0 | part_state->do_square_split && blk_params->bsize_at_least_8x8 && |
1705 | 0 | av1_is_whole_blk_in_frame(blk_params, mi_params) && |
1706 | 0 | !frame_is_intra_only(cm) && !av1_superres_scaled(cm); |
1707 | |
|
1708 | 0 | if (try_split_only) { |
1709 | 0 | simple_motion_search_based_split(cpi, x, sms_tree, part_state); |
1710 | 0 | } |
1711 | | |
1712 | | // Use simple motion search to prune out rectangular partition in some |
1713 | | // direction. The results are stored in prune_horz and prune_vert in order to |
1714 | | // bypass future related pruning checks if a pruning decision has been made. |
1715 | | |
1716 | | // We want to search at least one partition mode, so don't prune if NONE and |
1717 | | // SPLIT are disabled. |
1718 | 0 | const int non_rect_part_allowed = |
1719 | 0 | part_state->do_square_split || part_state->partition_none_allowed; |
1720 | | // Only run the model if the partitions are not already pruned. |
1721 | 0 | const int rect_part_allowed = part_state->do_rectangular_split && |
1722 | 0 | ((part_state->partition_rect_allowed[HORZ] && |
1723 | 0 | !part_state->prune_rect_part[HORZ]) || |
1724 | 0 | (part_state->partition_rect_allowed[VERT] && |
1725 | 0 | !part_state->prune_rect_part[VERT])); |
1726 | |
|
1727 | 0 | const int try_prune_rect = cpi->sf.part_sf.simple_motion_search_prune_rect && |
1728 | 0 | !frame_is_intra_only(cm) && |
1729 | 0 | non_rect_part_allowed && rect_part_allowed && |
1730 | 0 | !av1_superres_scaled(cm); |
1731 | |
|
1732 | 0 | if (try_prune_rect) { |
1733 | 0 | simple_motion_search_prune_rect(cpi, x, sms_tree, part_state); |
1734 | 0 | } |
1735 | 0 | } |
1736 | | |
1737 | | #ifndef NDEBUG |
1738 | | static inline int is_bsize_square(BLOCK_SIZE bsize) { |
1739 | | return block_size_wide[bsize] == block_size_high[bsize]; |
1740 | | } |
1741 | | #endif // NDEBUG |
1742 | | |
1743 | | void av1_prune_partitions_by_max_min_bsize(SuperBlockEnc *sb_enc, |
1744 | 0 | PartitionSearchState *part_state) { |
1745 | 0 | assert(is_bsize_square(sb_enc->max_partition_size)); |
1746 | 0 | assert(is_bsize_square(sb_enc->min_partition_size)); |
1747 | 0 | assert(sb_enc->min_partition_size <= sb_enc->max_partition_size); |
1748 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1749 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1750 | 0 | assert(is_bsize_square(bsize)); |
1751 | 0 | const int max_partition_size_1d = block_size_wide[sb_enc->max_partition_size]; |
1752 | 0 | const int min_partition_size_1d = block_size_wide[sb_enc->min_partition_size]; |
1753 | 0 | const int bsize_1d = block_size_wide[bsize]; |
1754 | 0 | assert(min_partition_size_1d <= max_partition_size_1d); |
1755 | 0 | const int is_le_min_sq_part = bsize_1d <= min_partition_size_1d; |
1756 | 0 | const int is_gt_max_sq_part = bsize_1d > max_partition_size_1d; |
1757 | 0 | if (is_gt_max_sq_part) { |
1758 | | // If current block size is larger than max, only allow split. |
1759 | 0 | av1_set_square_split_only(part_state); |
1760 | 0 | } else if (is_le_min_sq_part) { |
1761 | | // If current block size is less or equal to min, only allow none if valid |
1762 | | // block large enough; only allow split otherwise. |
1763 | 0 | av1_disable_rect_partitions(part_state); |
1764 | | |
1765 | | // only disable square split when current block is not at the picture |
1766 | | // boundary. otherwise, inherit the square split flag from previous logic |
1767 | 0 | if (av1_blk_has_rows_and_cols(blk_params)) { |
1768 | 0 | part_state->do_square_split = 0; |
1769 | 0 | } |
1770 | 0 | part_state->partition_none_allowed = !(part_state->do_square_split); |
1771 | 0 | } |
1772 | 0 | } |
1773 | | |
1774 | | // Decide whether to evaluate the AB partition specified by part_type based on |
1775 | | // split and HORZ/VERT info |
1776 | | static int evaluate_ab_partition_based_on_split( |
1777 | | const PC_TREE *pc_tree, PARTITION_TYPE rect_part, |
1778 | | const RD_RECT_PART_WIN_INFO *rect_part_win_info, int qindex, int split_idx1, |
1779 | 0 | int split_idx2) { |
1780 | 0 | int num_win = 0; |
1781 | | // Threshold for number of winners |
1782 | | // Conservative pruning for high quantizers |
1783 | 0 | const int num_win_thresh = AOMMIN(3 * (2 * (MAXQ - qindex) / MAXQ), 3); |
1784 | 0 | int sub_part_win = |
1785 | 0 | (rect_part_win_info == NULL) ? (pc_tree->partitioning == rect_part) |
1786 | 0 | : (rect_part == PARTITION_HORZ) ? rect_part_win_info->rect_part_win[HORZ] |
1787 | 0 | : rect_part_win_info->rect_part_win[VERT]; |
1788 | 0 | num_win += (sub_part_win) ? 1 : 0; |
1789 | 0 | if (pc_tree->split[split_idx1]) { |
1790 | 0 | num_win += |
1791 | 0 | (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0; |
1792 | 0 | } else { |
1793 | 0 | num_win += 1; |
1794 | 0 | } |
1795 | 0 | if (pc_tree->split[split_idx2]) { |
1796 | 0 | num_win += |
1797 | 0 | (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0; |
1798 | 0 | } else { |
1799 | 0 | num_win += 1; |
1800 | 0 | } |
1801 | 0 | if (num_win < num_win_thresh) { |
1802 | 0 | return 0; |
1803 | 0 | } |
1804 | 0 | return 1; |
1805 | 0 | } |
1806 | | |
1807 | | void av1_prune_ab_partitions(AV1_COMP *cpi, const MACROBLOCK *x, |
1808 | | const PC_TREE *pc_tree, int pb_source_variance, |
1809 | | int64_t best_rdcost, |
1810 | | const RD_RECT_PART_WIN_INFO *rect_part_win_info, |
1811 | | bool ext_partition_allowed, |
1812 | | PartitionSearchState *part_state, |
1813 | 0 | int *ab_partitions_allowed) { |
1814 | 0 | int64_t *horz_rd = part_state->rect_part_rd[HORZ]; |
1815 | 0 | int64_t *vert_rd = part_state->rect_part_rd[VERT]; |
1816 | 0 | int64_t *split_rd = part_state->split_rd; |
1817 | 0 | const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg; |
1818 | | // The standard AB partitions are allowed initially if ext-partition-types are |
1819 | | // allowed. |
1820 | 0 | int horzab_partition_allowed = ext_partition_allowed && |
1821 | 0 | part_cfg->enable_ab_partitions && |
1822 | 0 | part_state->partition_rect_allowed[HORZ]; |
1823 | 0 | int vertab_partition_allowed = ext_partition_allowed && |
1824 | 0 | part_cfg->enable_ab_partitions && |
1825 | 0 | part_state->partition_rect_allowed[VERT]; |
1826 | | |
1827 | | // Pruning: pruning out AB partitions on one main direction based on the |
1828 | | // current best partition and source variance. |
1829 | 0 | if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1830 | 0 | if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 1) { |
1831 | | // TODO(debargha,huisu@google.com): may need to tune the threshold for |
1832 | | // pb_source_variance. |
1833 | 0 | horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || |
1834 | 0 | (pc_tree->partitioning == PARTITION_NONE && |
1835 | 0 | pb_source_variance < 32) || |
1836 | 0 | pc_tree->partitioning == PARTITION_SPLIT); |
1837 | 0 | vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || |
1838 | 0 | (pc_tree->partitioning == PARTITION_NONE && |
1839 | 0 | pb_source_variance < 32) || |
1840 | 0 | pc_tree->partitioning == PARTITION_SPLIT); |
1841 | 0 | } else { |
1842 | 0 | horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || |
1843 | 0 | pc_tree->partitioning == PARTITION_SPLIT); |
1844 | 0 | vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || |
1845 | 0 | pc_tree->partitioning == PARTITION_SPLIT); |
1846 | 0 | } |
1847 | 0 | horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0); |
1848 | 0 | horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0); |
1849 | 0 | vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0); |
1850 | 0 | vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0); |
1851 | 0 | split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0); |
1852 | 0 | split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0); |
1853 | 0 | split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0); |
1854 | 0 | split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0); |
1855 | 0 | } |
1856 | | |
1857 | | // Pruning: pruning out horz_a or horz_b if the combined rdcost of its |
1858 | | // subblocks estimated from previous partitions is much higher than the best |
1859 | | // rd so far. |
1860 | 0 | ab_partitions_allowed[HORZ_A] = horzab_partition_allowed; |
1861 | 0 | ab_partitions_allowed[HORZ_B] = horzab_partition_allowed; |
1862 | 0 | if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1863 | 0 | const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1]; |
1864 | 0 | const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3]; |
1865 | 0 | switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1866 | 0 | case 1: |
1867 | 0 | ab_partitions_allowed[HORZ_A] &= (horz_a_rd / 16 * 14 < best_rdcost); |
1868 | 0 | ab_partitions_allowed[HORZ_B] &= (horz_b_rd / 16 * 14 < best_rdcost); |
1869 | 0 | break; |
1870 | 0 | case 2: |
1871 | 0 | default: |
1872 | 0 | ab_partitions_allowed[HORZ_A] &= (horz_a_rd / 16 * 15 < best_rdcost); |
1873 | 0 | ab_partitions_allowed[HORZ_B] &= (horz_b_rd / 16 * 15 < best_rdcost); |
1874 | 0 | break; |
1875 | 0 | } |
1876 | 0 | } |
1877 | | |
1878 | | // Pruning: pruning out vert_a or vert_b if the combined rdcost of its |
1879 | | // subblocks estimated from previous partitions is much higher than the best |
1880 | | // rd so far. |
1881 | 0 | ab_partitions_allowed[VERT_A] = vertab_partition_allowed; |
1882 | 0 | ab_partitions_allowed[VERT_B] = vertab_partition_allowed; |
1883 | 0 | if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1884 | 0 | const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2]; |
1885 | 0 | const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3]; |
1886 | 0 | switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1887 | 0 | case 1: |
1888 | 0 | ab_partitions_allowed[VERT_A] &= (vert_a_rd / 16 * 14 < best_rdcost); |
1889 | 0 | ab_partitions_allowed[VERT_B] &= (vert_b_rd / 16 * 14 < best_rdcost); |
1890 | 0 | break; |
1891 | 0 | case 2: |
1892 | 0 | default: |
1893 | 0 | ab_partitions_allowed[VERT_A] &= (vert_a_rd / 16 * 15 < best_rdcost); |
1894 | 0 | ab_partitions_allowed[VERT_B] &= (vert_b_rd / 16 * 15 < best_rdcost); |
1895 | 0 | break; |
1896 | 0 | } |
1897 | 0 | } |
1898 | | |
1899 | | // Pruning: pruning out some ab partitions using a DNN taking rd costs of |
1900 | | // sub-blocks from previous basic partition types. |
1901 | 0 | if (cpi->sf.part_sf.ml_prune_partition && ext_partition_allowed && |
1902 | 0 | part_state->partition_rect_allowed[HORZ] && |
1903 | 0 | part_state->partition_rect_allowed[VERT]) { |
1904 | | // TODO(huisu@google.com): x->source_variance may not be the current |
1905 | | // block's variance. The correct one to use is pb_source_variance. Need to |
1906 | | // re-train the model to fix it. |
1907 | 0 | ml_prune_ab_partition(cpi, pc_tree->partitioning, |
1908 | 0 | get_unsigned_bits(x->source_variance), best_rdcost, |
1909 | 0 | part_state, ab_partitions_allowed); |
1910 | 0 | } |
1911 | | |
1912 | | // Pruning: pruning AB partitions based on the number of horz/vert wins |
1913 | | // in the current block and sub-blocks in PARTITION_SPLIT. |
1914 | 0 | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1915 | 0 | ab_partitions_allowed[HORZ_A]) { |
1916 | 0 | ab_partitions_allowed[HORZ_A] &= evaluate_ab_partition_based_on_split( |
1917 | 0 | pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 0, 1); |
1918 | 0 | } |
1919 | 0 | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1920 | 0 | ab_partitions_allowed[HORZ_B]) { |
1921 | 0 | ab_partitions_allowed[HORZ_B] &= evaluate_ab_partition_based_on_split( |
1922 | 0 | pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 2, 3); |
1923 | 0 | } |
1924 | 0 | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1925 | 0 | ab_partitions_allowed[VERT_A]) { |
1926 | 0 | ab_partitions_allowed[VERT_A] &= evaluate_ab_partition_based_on_split( |
1927 | 0 | pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 0, 2); |
1928 | 0 | } |
1929 | 0 | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1930 | 0 | ab_partitions_allowed[VERT_B]) { |
1931 | 0 | ab_partitions_allowed[VERT_B] &= evaluate_ab_partition_based_on_split( |
1932 | 0 | pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 1, 3); |
1933 | 0 | } |
1934 | 0 | } |
1935 | | |
1936 | | // Prepare features for the external model. Specifically, features after |
1937 | | // ab partition is searched. |
1938 | | static void prepare_features_after_part_ab( |
1939 | | const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, |
1940 | | int part_ctx, int64_t best_rd, |
1941 | | int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], |
1942 | | int64_t split_rd[SUB_PARTITIONS_SPLIT], unsigned int pb_source_variance, |
1943 | 0 | int mi_row, int mi_col, aom_partition_features_t *const features) { |
1944 | 0 | int64_t *horz_rd = rect_part_rd[HORZ]; |
1945 | 0 | int64_t *vert_rd = rect_part_rd[VERT]; |
1946 | | |
1947 | | // Generate features. |
1948 | 0 | int feature_index = 0; |
1949 | 0 | features->after_part_ab.f[feature_index++] = (float)part_ctx; |
1950 | 0 | features->after_part_ab.f[feature_index++] = |
1951 | 0 | (float)get_unsigned_bits(pb_source_variance); |
1952 | |
|
1953 | 0 | const int rdcost = (int)AOMMIN(INT_MAX, best_rd); |
1954 | 0 | int sub_block_rdcost[8] = { 0 }; |
1955 | 0 | int rd_index = 0; |
1956 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1957 | 0 | if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) |
1958 | 0 | sub_block_rdcost[rd_index] = (int)horz_rd[i]; |
1959 | 0 | ++rd_index; |
1960 | 0 | } |
1961 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1962 | 0 | if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) |
1963 | 0 | sub_block_rdcost[rd_index] = (int)vert_rd[i]; |
1964 | 0 | ++rd_index; |
1965 | 0 | } |
1966 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1967 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1968 | 0 | sub_block_rdcost[rd_index] = (int)split_rd[i]; |
1969 | 0 | ++rd_index; |
1970 | 0 | } |
1971 | 0 | for (int i = 0; i < 8; ++i) { |
1972 | | // Ratio between the sub-block RD and the whole-block RD. |
1973 | 0 | float rd_ratio = 1.0f; |
1974 | 0 | if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) |
1975 | 0 | rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; |
1976 | 0 | features->after_part_ab.f[feature_index++] = rd_ratio; |
1977 | 0 | } |
1978 | | |
1979 | | // 4-way partitions are only allowed for these three square block sizes. |
1980 | 0 | assert(bsize == BLOCK_16X16 || bsize == BLOCK_32X32 || bsize == BLOCK_64X64); |
1981 | | |
1982 | | // Get variance of the 1:4 and 4:1 sub-blocks. |
1983 | 0 | unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
1984 | 0 | unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
1985 | 0 | { |
1986 | 0 | BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); |
1987 | 0 | BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); |
1988 | |
|
1989 | 0 | assert(horz_4_bs != BLOCK_INVALID); |
1990 | 0 | assert(vert_4_bs != BLOCK_INVALID); |
1991 | |
|
1992 | 0 | av1_setup_src_planes(x, cpi->source, mi_row, mi_col, |
1993 | 0 | av1_num_planes(&cpi->common), bsize); |
1994 | 0 | const int src_stride = x->plane[0].src.stride; |
1995 | 0 | uint8_t *src = x->plane[0].src.buf; |
1996 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1997 | |
|
1998 | 0 | struct buf_2d horz_4_src, vert_4_src; |
1999 | 0 | horz_4_src.stride = src_stride; |
2000 | 0 | vert_4_src.stride = src_stride; |
2001 | |
|
2002 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
2003 | 0 | horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride; |
2004 | 0 | vert_4_src.buf = src + i * block_size_wide[vert_4_bs]; |
2005 | |
|
2006 | 0 | horz_4_source_var[i] = av1_get_perpixel_variance_facade( |
2007 | 0 | cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y); |
2008 | 0 | vert_4_source_var[i] = av1_get_perpixel_variance_facade( |
2009 | 0 | cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y); |
2010 | 0 | } |
2011 | 0 | } |
2012 | |
|
2013 | 0 | const float denom = (float)(pb_source_variance + 1); |
2014 | 0 | const float low_b = 0.1f; |
2015 | 0 | const float high_b = 10.0f; |
2016 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
2017 | | // Ratio between the 4:1 sub-block variance and the whole-block variance. |
2018 | 0 | float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; |
2019 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
2020 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
2021 | 0 | features->after_part_ab.f[feature_index++] = var_ratio; |
2022 | 0 | } |
2023 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
2024 | | // Ratio between the 1:4 sub-block RD and the whole-block RD. |
2025 | 0 | float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; |
2026 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
2027 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
2028 | 0 | features->after_part_ab.f[feature_index++] = var_ratio; |
2029 | 0 | } |
2030 | 0 | assert(feature_index == 18); |
2031 | 0 | } |
2032 | | |
2033 | | // If the external partition model is used, we let it determine partition |
2034 | | // decisions before partition none. Specifically, these parameters: |
2035 | | // partition_none_allowed |
2036 | | // partition_horz_allowed |
2037 | | // partition_vert_allowed |
2038 | | // do_rectangular_split |
2039 | | // do_square_split |
2040 | | static bool ext_ml_model_decision_before_none( |
2041 | | AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT], |
2042 | | int *partition_none_allowed, int *partition_horz_allowed, |
2043 | | int *partition_vert_allowed, int *do_rectangular_split, |
2044 | 0 | int *do_square_split) { |
2045 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2046 | 0 | if (!ext_part_controller->ready) return false; |
2047 | | |
2048 | | // Setup features. |
2049 | 0 | aom_partition_features_t features; |
2050 | 0 | features.id = AOM_EXT_PART_FEATURE_BEFORE_NONE; |
2051 | 0 | for (int i = 0; i < FEATURE_SIZE_SMS_SPLIT; ++i) { |
2052 | 0 | features.before_part_none.f[i] = features_from_motion[i]; |
2053 | 0 | } |
2054 | | |
2055 | | // Send necessary features to the external model. |
2056 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2057 | | |
2058 | | // Get partition decisions from the external model. |
2059 | 0 | aom_partition_decision_t decision; |
2060 | 0 | const bool valid_decision = |
2061 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2062 | 0 | if (!valid_decision) return false; |
2063 | | |
2064 | | // Populate decisions |
2065 | 0 | *partition_none_allowed = decision.partition_none_allowed; |
2066 | 0 | *partition_horz_allowed = decision.partition_rect_allowed[HORZ]; |
2067 | 0 | *partition_vert_allowed = decision.partition_rect_allowed[VERT]; |
2068 | 0 | *do_rectangular_split = decision.do_rectangular_split; |
2069 | 0 | *do_square_split = decision.do_square_split; |
2070 | |
|
2071 | 0 | return true; |
2072 | 0 | } |
2073 | | |
2074 | | // If the external partition model is used, we let it determine partition |
2075 | | // decisions before partition none. Specifically, these parameters: |
2076 | | // prune_horz |
2077 | | // prune_vert |
2078 | | static bool ext_ml_model_decision_before_none_part2( |
2079 | | AV1_COMP *cpi, |
2080 | | const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART], |
2081 | 0 | int *prune_horz, int *prune_vert) { |
2082 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2083 | 0 | if (!ext_part_controller->ready) return false; |
2084 | | |
2085 | | // Setup features. |
2086 | 0 | aom_partition_features_t features; |
2087 | 0 | features.id = AOM_EXT_PART_FEATURE_BEFORE_NONE_PART2; |
2088 | 0 | for (int i = 0; i < FEATURE_SIZE_SMS_PRUNE_PART; ++i) { |
2089 | 0 | features.before_part_none.f_part2[i] = features_from_motion[i]; |
2090 | 0 | } |
2091 | | |
2092 | | // Send necessary features to the external model. |
2093 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2094 | | |
2095 | | // Get partition decisions from the external model. |
2096 | 0 | aom_partition_decision_t decision; |
2097 | 0 | const bool valid_decision = |
2098 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2099 | 0 | if (!valid_decision) return false; |
2100 | | |
2101 | | // Populate decisions |
2102 | 0 | *prune_horz = decision.prune_rect_part[HORZ]; |
2103 | 0 | *prune_vert = decision.prune_rect_part[VERT]; |
2104 | |
|
2105 | 0 | return true; |
2106 | 0 | } |
2107 | | |
2108 | | // If the external partition model is used, we let it determine partition |
2109 | | // decisions after none partition. Specifically, these parameters: |
2110 | | // do_square_split |
2111 | | // do_rectangular_split |
2112 | | bool ext_ml_model_decision_after_none( |
2113 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
2114 | | const float *const features_after_none, int *do_square_split, |
2115 | 0 | int *do_rectangular_split) { |
2116 | 0 | if (!ext_part_controller->ready || is_intra_frame) return false; |
2117 | | |
2118 | | // Setup features. |
2119 | 0 | aom_partition_features_t features; |
2120 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_NONE; |
2121 | 0 | for (int i = 0; i < 4; ++i) { |
2122 | 0 | features.after_part_none.f[i] = features_after_none[i]; |
2123 | 0 | } |
2124 | | |
2125 | | // Send necessary features to the external model. |
2126 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2127 | | |
2128 | | // Get partition decisions from the external model. |
2129 | 0 | aom_partition_decision_t decision; |
2130 | 0 | const bool valid_decision = |
2131 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2132 | 0 | if (!valid_decision) return false; |
2133 | | |
2134 | | // Populate decisions |
2135 | 0 | *do_square_split = decision.do_square_split; |
2136 | 0 | *do_rectangular_split = decision.do_rectangular_split; |
2137 | |
|
2138 | 0 | return true; |
2139 | 0 | } |
2140 | | |
2141 | | // If the external partition model is used, we let it determine partition |
2142 | | // decisions after none partition. Specifically, these parameters: |
2143 | | // terminate_partition_search |
2144 | | bool ext_ml_model_decision_after_none_part2( |
2145 | | AV1_COMP *const cpi, const float *const features_terminate, |
2146 | 0 | int *terminate_partition_search) { |
2147 | 0 | AV1_COMMON *const cm = &cpi->common; |
2148 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2149 | 0 | if (!ext_part_controller->ready || frame_is_intra_only(cm)) return false; |
2150 | | |
2151 | | // Setup features. |
2152 | 0 | aom_partition_features_t features; |
2153 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_NONE_PART2; |
2154 | 0 | for (int i = 0; i < FEATURE_SIZE_SMS_TERM_NONE; ++i) { |
2155 | 0 | features.after_part_none.f_terminate[i] = features_terminate[i]; |
2156 | 0 | } |
2157 | | |
2158 | | // Send necessary features to the external model. |
2159 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2160 | | |
2161 | | // Get partition decisions from the external model. |
2162 | 0 | aom_partition_decision_t decision; |
2163 | 0 | const bool valid_decision = |
2164 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2165 | 0 | if (!valid_decision) return false; |
2166 | | |
2167 | | // Populate decisions |
2168 | 0 | *terminate_partition_search = decision.terminate_partition_search; |
2169 | |
|
2170 | 0 | return true; |
2171 | 0 | } |
2172 | | |
2173 | | // If the external partition model is used, we let it determine partition |
2174 | | // decisions after none partition. Specifically, these parameters: |
2175 | | // terminate_partition_search |
2176 | | bool ext_ml_model_decision_after_split(AV1_COMP *const cpi, |
2177 | | const float *const features_terminate, |
2178 | 0 | int *terminate_partition_search) { |
2179 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2180 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2181 | 0 | if (frame_is_intra_only(cm) || !cpi->ext_part_controller.ready) { |
2182 | 0 | return false; |
2183 | 0 | } |
2184 | | |
2185 | | // Setup features. |
2186 | 0 | aom_partition_features_t features; |
2187 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_SPLIT; |
2188 | 0 | for (int i = 0; i < 31; ++i) { |
2189 | 0 | features.after_part_split.f_terminate[i] = features_terminate[i]; |
2190 | 0 | } |
2191 | | |
2192 | | // Send necessary features to the external model. |
2193 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2194 | | |
2195 | | // Get partition decisions from the external model. |
2196 | 0 | aom_partition_decision_t decision; |
2197 | 0 | const bool valid_decision = |
2198 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2199 | 0 | if (!valid_decision) return false; |
2200 | | |
2201 | | // Populate decisions |
2202 | 0 | *terminate_partition_search = decision.terminate_partition_search; |
2203 | |
|
2204 | 0 | return true; |
2205 | 0 | } |
2206 | | |
2207 | | // If the external partition model is used, we let it determine partition |
2208 | | // decisions after none partition. Specifically, these parameters: |
2209 | | // prune_rect_part[HORZ] |
2210 | | // prune_rect_part[VERT] |
2211 | | bool ext_ml_model_decision_after_split_part2( |
2212 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
2213 | | const float *const features_prune, int *prune_rect_part_horz, |
2214 | 0 | int *prune_rect_part_vert) { |
2215 | 0 | if (is_intra_frame || !ext_part_controller->ready) { |
2216 | 0 | return false; |
2217 | 0 | } |
2218 | | |
2219 | | // Setup features. |
2220 | 0 | aom_partition_features_t features; |
2221 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_SPLIT_PART2; |
2222 | 0 | for (int i = 0; i < 9; ++i) { |
2223 | 0 | features.after_part_split.f_prune_rect[i] = features_prune[i]; |
2224 | 0 | } |
2225 | | |
2226 | | // Send necessary features to the external model. |
2227 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2228 | | |
2229 | | // Get partition decisions from the external model. |
2230 | 0 | aom_partition_decision_t decision; |
2231 | 0 | const bool valid_decision = |
2232 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2233 | 0 | if (!valid_decision) return false; |
2234 | | |
2235 | | // Populate decisions |
2236 | 0 | *prune_rect_part_horz = decision.prune_rect_part[0]; |
2237 | 0 | *prune_rect_part_vert = decision.prune_rect_part[1]; |
2238 | |
|
2239 | 0 | return true; |
2240 | 0 | } |
2241 | | |
2242 | | // If the external partition model is used, we let it determine partition |
2243 | | // decisions after rectangular partition. Specifically, these parameters: |
2244 | | // horza_partition_allowed |
2245 | | // horzb_partition_allowed |
2246 | | // verta_partition_allowed |
2247 | | // vertb_partition_allowed |
2248 | | static bool ext_ml_model_decision_after_rect( |
2249 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
2250 | | const float *const features_after_rect, int *horza_partition_allowed, |
2251 | | int *horzb_partition_allowed, int *verta_partition_allowed, |
2252 | 0 | int *vertb_partition_allowed) { |
2253 | 0 | if (is_intra_frame || !ext_part_controller->ready) return false; |
2254 | | |
2255 | | // Setup features. |
2256 | 0 | aom_partition_features_t features; |
2257 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_RECT; |
2258 | 0 | for (int i = 0; i < 10; ++i) { |
2259 | 0 | features.after_part_rect.f[i] = features_after_rect[i]; |
2260 | 0 | } |
2261 | | |
2262 | | // Send necessary features to the external model. |
2263 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2264 | | |
2265 | | // Get partition decisions from the external model. |
2266 | 0 | aom_partition_decision_t decision; |
2267 | 0 | const bool valid_decision = |
2268 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2269 | 0 | if (!valid_decision) return false; |
2270 | | |
2271 | | // Populate decisions |
2272 | 0 | *horza_partition_allowed = decision.horza_partition_allowed; |
2273 | 0 | *horzb_partition_allowed = decision.horzb_partition_allowed; |
2274 | 0 | *verta_partition_allowed = decision.verta_partition_allowed; |
2275 | 0 | *vertb_partition_allowed = decision.vertb_partition_allowed; |
2276 | |
|
2277 | 0 | return true; |
2278 | 0 | } |
2279 | | |
2280 | | // If the external partition model is used, we let it determine partition |
2281 | | // decisions after AB partition. Specifically, these parameters: |
2282 | | // partition_vert4_allowed |
2283 | | // partition_horz4_allowed |
2284 | | static bool ext_ml_model_decision_after_part_ab( |
2285 | | AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx, |
2286 | | int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], |
2287 | | int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed, |
2288 | | int *const partition_vert4_allowed, unsigned int pb_source_variance, |
2289 | 0 | int mi_row, int mi_col) { |
2290 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2291 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2292 | |
|
2293 | 0 | if (!frame_is_intra_only(cm) && ext_part_controller->ready) { |
2294 | | // Setup features. |
2295 | 0 | aom_partition_features_t features; |
2296 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_AB; |
2297 | 0 | prepare_features_after_part_ab(cpi, x, bsize, part_ctx, best_rd, |
2298 | 0 | rect_part_rd, split_rd, pb_source_variance, |
2299 | 0 | mi_row, mi_col, &features); |
2300 | | |
2301 | | // Send necessary features to the external model. |
2302 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2303 | | |
2304 | | // Get partition decisions from the external model. |
2305 | 0 | aom_partition_decision_t decision; |
2306 | 0 | const bool valid_decision = |
2307 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2308 | 0 | if (!valid_decision) return false; |
2309 | | |
2310 | | // Populate decisions |
2311 | 0 | *partition_horz4_allowed = decision.partition_horz4_allowed; |
2312 | 0 | *partition_vert4_allowed = decision.partition_vert4_allowed; |
2313 | |
|
2314 | 0 | return true; |
2315 | 0 | } |
2316 | | |
2317 | 0 | return false; |
2318 | 0 | } |
2319 | | |
2320 | | // This function resembles "av1_setup_sms_tree()" in context_tree.c |
2321 | | // with function signature change. |
2322 | | static SIMPLE_MOTION_DATA_TREE *setup_sms_tree( |
2323 | 0 | AV1_COMP *const cpi, SIMPLE_MOTION_DATA_TREE *sms_tree) { |
2324 | 0 | AV1_COMMON *const cm = &cpi->common; |
2325 | 0 | const int stat_generation_stage = is_stat_generation_stage(cpi); |
2326 | 0 | const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; |
2327 | 0 | const int tree_nodes = |
2328 | 0 | av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); |
2329 | 0 | int sms_tree_index = 0; |
2330 | 0 | SIMPLE_MOTION_DATA_TREE *this_sms; |
2331 | 0 | int square_index = 1; |
2332 | 0 | int nodes; |
2333 | 0 | this_sms = &sms_tree[0]; |
2334 | |
|
2335 | 0 | if (!stat_generation_stage) { |
2336 | 0 | const int leaf_factor = is_sb_size_128 ? 4 : 1; |
2337 | 0 | const int leaf_nodes = 256 * leaf_factor; |
2338 | | |
2339 | | // Sets up all the leaf nodes in the tree. |
2340 | 0 | for (sms_tree_index = 0; sms_tree_index < leaf_nodes; ++sms_tree_index) { |
2341 | 0 | SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; |
2342 | 0 | tree->block_size = square[0]; |
2343 | 0 | } |
2344 | | |
2345 | | // Each node has 4 leaf nodes, fill each block_size level of the tree |
2346 | | // from leafs to the root. |
2347 | 0 | for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { |
2348 | 0 | for (int i = 0; i < nodes; ++i) { |
2349 | 0 | SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; |
2350 | 0 | tree->block_size = square[square_index]; |
2351 | 0 | for (int j = 0; j < 4; j++) tree->split[j] = this_sms++; |
2352 | 0 | ++sms_tree_index; |
2353 | 0 | } |
2354 | 0 | ++square_index; |
2355 | 0 | } |
2356 | 0 | } else { |
2357 | | // Allocation for firstpass/LAP stage |
2358 | | // TODO(Mufaddal): refactor square_index to use a common block_size macro |
2359 | | // from firstpass.c |
2360 | 0 | SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; |
2361 | 0 | square_index = 2; |
2362 | 0 | tree->block_size = square[square_index]; |
2363 | 0 | } |
2364 | | |
2365 | | // Set up the root node for the largest superblock size |
2366 | 0 | return &sms_tree[tree_nodes - 1]; |
2367 | 0 | } |
2368 | | |
2369 | | static void write_motion_feature_to_file( |
2370 | | const char *const path, const int sb_counter, const unsigned int *block_sse, |
2371 | | const unsigned int *block_var, const int num_blocks, const BLOCK_SIZE bsize, |
2372 | 0 | const BLOCK_SIZE fixed_block_size, const int mi_row, const int mi_col) { |
2373 | 0 | char filename[256]; |
2374 | 0 | snprintf(filename, sizeof(filename), "%s/motion_search_feature_sb%d", path, |
2375 | 0 | sb_counter); |
2376 | 0 | FILE *pfile = fopen(filename, "w"); |
2377 | 0 | fprintf(pfile, "%d,%d,%d,%d,%d\n", mi_row, mi_col, bsize, |
2378 | 0 | block_size_wide[fixed_block_size], num_blocks); |
2379 | 0 | for (int i = 0; i < num_blocks; ++i) { |
2380 | 0 | fprintf(pfile, "%d", block_sse[i]); |
2381 | 0 | if (i < num_blocks - 1) fprintf(pfile, ","); |
2382 | 0 | } |
2383 | 0 | fprintf(pfile, "\n"); |
2384 | 0 | for (int i = 0; i < num_blocks; ++i) { |
2385 | 0 | fprintf(pfile, "%d", block_var[i]); |
2386 | 0 | if (i < num_blocks - 1) fprintf(pfile, ","); |
2387 | 0 | } |
2388 | 0 | fprintf(pfile, "\n"); |
2389 | 0 | fclose(pfile); |
2390 | 0 | } |
2391 | | |
2392 | | void av1_collect_motion_search_features_sb(AV1_COMP *const cpi, ThreadData *td, |
2393 | | TileDataEnc *tile_data, |
2394 | | const int mi_row, const int mi_col, |
2395 | | const BLOCK_SIZE bsize, |
2396 | 0 | aom_partition_features_t *features) { |
2397 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2398 | 0 | if (frame_is_intra_only(cm)) return; |
2399 | | |
2400 | 0 | MACROBLOCK *const x = &td->mb; |
2401 | 0 | const BLOCK_SIZE fixed_block_size = BLOCK_16X16; |
2402 | 0 | const int col_step = mi_size_wide[fixed_block_size]; |
2403 | 0 | const int row_step = mi_size_high[fixed_block_size]; |
2404 | 0 | SIMPLE_MOTION_DATA_TREE *sms_tree = NULL; |
2405 | 0 | const int stat_generation_stage = is_stat_generation_stage(cpi); |
2406 | 0 | const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; |
2407 | 0 | const int tree_nodes = |
2408 | 0 | av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); |
2409 | 0 | CHECK_MEM_ERROR(cm, sms_tree, aom_calloc(tree_nodes, sizeof(*sms_tree))); |
2410 | 0 | SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree); |
2411 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
2412 | 0 | av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize); |
2413 | 0 | av1_init_simple_motion_search_mvs_for_sb(cpi, NULL, x, sms_root, mi_row, |
2414 | 0 | mi_col); |
2415 | 0 | av1_reset_simple_motion_tree_partition(sms_root, bsize); |
2416 | 0 | const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
2417 | 0 | : LAST_FRAME }; |
2418 | 0 | const int mi_width = |
2419 | 0 | AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col); |
2420 | 0 | const int mi_height = |
2421 | 0 | AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row); |
2422 | 0 | const int col_steps = (mi_width / col_step) + ((mi_width % col_step) > 0); |
2423 | 0 | const int row_steps = (mi_height / row_step) + ((mi_height % row_step) > 0); |
2424 | 0 | const int num_blocks = col_steps * row_steps; |
2425 | 0 | unsigned int *block_sse = aom_calloc(num_blocks, sizeof(*block_sse)); |
2426 | 0 | unsigned int *block_var = aom_calloc(num_blocks, sizeof(*block_var)); |
2427 | 0 | if (!(block_sse && block_var)) { |
2428 | 0 | aom_free(sms_tree); |
2429 | 0 | aom_free(block_sse); |
2430 | 0 | aom_free(block_var); |
2431 | 0 | aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, |
2432 | 0 | "Error allocating block_sse & block_var"); |
2433 | 0 | } |
2434 | 0 | int idx = 0; |
2435 | |
|
2436 | 0 | for (int row = mi_row; |
2437 | 0 | row < AOMMIN(mi_row + mi_size_high[bsize], cm->mi_params.mi_rows); |
2438 | 0 | row += row_step) { |
2439 | 0 | for (int col = mi_col; |
2440 | 0 | col < AOMMIN(mi_col + mi_size_wide[bsize], cm->mi_params.mi_cols); |
2441 | 0 | col += col_step) { |
2442 | 0 | simple_motion_search_get_best_ref( |
2443 | 0 | cpi, x, sms_root, row, col, fixed_block_size, ref_list, |
2444 | 0 | /*num_refs=*/1, /*use_subpixel=*/1, |
2445 | 0 | /*save_mv=*/1, &block_sse[idx], &block_var[idx]); |
2446 | 0 | ++idx; |
2447 | 0 | } |
2448 | 0 | } |
2449 | 0 | if (features == NULL) { |
2450 | 0 | write_motion_feature_to_file(cpi->oxcf.partition_info_path, cpi->sb_counter, |
2451 | 0 | block_sse, block_var, idx, bsize, |
2452 | 0 | fixed_block_size, mi_row, mi_col); |
2453 | 0 | } else { |
2454 | 0 | features->sb_features.motion_features.unit_length = |
2455 | 0 | block_size_wide[fixed_block_size]; |
2456 | 0 | features->sb_features.motion_features.num_units = idx; |
2457 | 0 | for (int i = 0; i < idx; ++i) { |
2458 | 0 | features->sb_features.motion_features.block_sse[i] = block_sse[i]; |
2459 | 0 | features->sb_features.motion_features.block_var[i] = block_var[i]; |
2460 | 0 | } |
2461 | 0 | } |
2462 | |
|
2463 | 0 | aom_free(block_sse); |
2464 | 0 | aom_free(block_var); |
2465 | 0 | aom_free(sms_tree); |
2466 | 0 | } |
2467 | | |
2468 | | #if CONFIG_PARTITION_SEARCH_ORDER |
2469 | | void av1_prepare_motion_search_features_block( |
2470 | | AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, |
2471 | | const int mi_row, const int mi_col, const BLOCK_SIZE bsize, |
2472 | | const int valid_partition_types, unsigned int *block_sse, |
2473 | | unsigned int *block_var, unsigned int sub_block_sse[4], |
2474 | | unsigned int sub_block_var[4], unsigned int horz_block_sse[2], |
2475 | | unsigned int horz_block_var[2], unsigned int vert_block_sse[2], |
2476 | | unsigned int vert_block_var[2]) { |
2477 | | const AV1_COMMON *const cm = &cpi->common; |
2478 | | if (frame_is_intra_only(cm)) return; |
2479 | | MACROBLOCK *const x = &td->mb; |
2480 | | SIMPLE_MOTION_DATA_TREE *sms_tree = NULL; |
2481 | | const int stat_generation_stage = is_stat_generation_stage(cpi); |
2482 | | const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; |
2483 | | const int tree_nodes = |
2484 | | av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); |
2485 | | CHECK_MEM_ERROR(cm, sms_tree, aom_calloc(tree_nodes, sizeof(*sms_tree))); |
2486 | | SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree); |
2487 | | TileInfo *const tile_info = &tile_data->tile_info; |
2488 | | av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize); |
2489 | | av1_reset_simple_motion_tree_partition(sms_root, bsize); |
2490 | | const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
2491 | | : LAST_FRAME }; |
2492 | | const int sub_mi_width = mi_size_wide[bsize] / 2; |
2493 | | const int sub_mi_height = sub_mi_width; |
2494 | | simple_motion_search_get_best_ref( |
2495 | | cpi, x, sms_root, mi_row, mi_col, bsize, ref_list, /*num_refs=*/1, |
2496 | | /*use_subpixel=*/1, /*save_mv=*/1, block_sse, block_var); |
2497 | | // Split to 4 sub blocks. |
2498 | | if (valid_partition_types & (1 << PARTITION_SPLIT)) { |
2499 | | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
2500 | | for (int i = 0; i < 4; ++i) { |
2501 | | const int row = mi_row + (i >> 1) * sub_mi_height; |
2502 | | const int col = mi_col + (i & 1) * sub_mi_width; |
2503 | | simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, |
2504 | | ref_list, /*num_refs=*/1, |
2505 | | /*use_subpixel=*/1, /*save_mv=*/1, |
2506 | | &sub_block_sse[i], &sub_block_var[i]); |
2507 | | } |
2508 | | } |
2509 | | // Horizontal split |
2510 | | if (valid_partition_types & (1 << PARTITION_HORZ)) { |
2511 | | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); |
2512 | | for (int i = 0; i < 2; ++i) { |
2513 | | const int row = mi_row + (i & 1) * sub_mi_height; |
2514 | | const int col = mi_col; |
2515 | | simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, |
2516 | | ref_list, /*num_refs=*/1, |
2517 | | /*use_subpixel=*/1, /*save_mv=*/1, |
2518 | | &horz_block_sse[i], &horz_block_var[i]); |
2519 | | } |
2520 | | } |
2521 | | // Vertical split |
2522 | | if (valid_partition_types & (1 << PARTITION_VERT)) { |
2523 | | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); |
2524 | | for (int i = 0; i < 2; ++i) { |
2525 | | const int row = mi_row; |
2526 | | const int col = mi_col + (i & 1) * sub_mi_width; |
2527 | | simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, |
2528 | | ref_list, /*num_refs=*/1, |
2529 | | /*use_subpixel=*/1, /*save_mv=*/1, |
2530 | | &vert_block_sse[i], &vert_block_var[i]); |
2531 | | } |
2532 | | } |
2533 | | |
2534 | | aom_free(sms_tree); |
2535 | | } |
2536 | | #endif // CONFIG_PARTITION_SEARCH_ORDER |
2537 | | #endif // !CONFIG_REALTIME_ONLY |
2538 | | |
2539 | | static inline void init_simple_motion_search_mvs( |
2540 | 0 | SIMPLE_MOTION_DATA_TREE *sms_tree, const FULLPEL_MV *start_mvs) { |
2541 | 0 | memcpy(sms_tree->start_mvs, start_mvs, sizeof(sms_tree->start_mvs)); |
2542 | 0 | av1_zero(sms_tree->sms_none_feat); |
2543 | 0 | av1_zero(sms_tree->sms_rect_feat); |
2544 | 0 | av1_zero(sms_tree->sms_none_valid); |
2545 | 0 | av1_zero(sms_tree->sms_rect_valid); |
2546 | |
|
2547 | 0 | if (sms_tree->block_size >= BLOCK_8X8) { |
2548 | 0 | init_simple_motion_search_mvs(sms_tree->split[0], start_mvs); |
2549 | 0 | init_simple_motion_search_mvs(sms_tree->split[1], start_mvs); |
2550 | 0 | init_simple_motion_search_mvs(sms_tree->split[2], start_mvs); |
2551 | 0 | init_simple_motion_search_mvs(sms_tree->split[3], start_mvs); |
2552 | 0 | } |
2553 | 0 | } |
2554 | | |
2555 | | void av1_init_simple_motion_search_mvs_for_sb(const AV1_COMP *cpi, |
2556 | | const TileInfo *tile_info, |
2557 | | MACROBLOCK *x, |
2558 | | SIMPLE_MOTION_DATA_TREE *sms_root, |
2559 | 0 | int mi_row, int mi_col) { |
2560 | | // Use the NEARESTMV of the sb as the start mv |
2561 | 0 | const AV1_COMMON *cm = &cpi->common; |
2562 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2563 | 0 | FULLPEL_MV ref_mvs[REF_FRAMES]; |
2564 | 0 | const BLOCK_SIZE sb_size = cm->seq_params->sb_size; |
2565 | 0 | av1_zero(ref_mvs); |
2566 | | // If tile_info is NULL, assume that the offsets have already been set. |
2567 | 0 | if (tile_info) { |
2568 | 0 | av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, |
2569 | 0 | sb_size); |
2570 | 0 | } |
2571 | |
|
2572 | 0 | MB_MODE_INFO_EXT mbmi_ext; |
2573 | 0 | const int ref_frame = |
2574 | 0 | cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; |
2575 | 0 | av1_find_mv_refs(cm, xd, xd->mi[0], ref_frame, mbmi_ext.ref_mv_count, |
2576 | 0 | xd->ref_mv_stack, xd->weight, NULL, mbmi_ext.global_mvs, |
2577 | 0 | mbmi_ext.mode_context); |
2578 | 0 | if (mbmi_ext.ref_mv_count[ref_frame] > 0) { |
2579 | 0 | ref_mvs[ref_frame] = |
2580 | 0 | get_fullmv_from_mv(&xd->ref_mv_stack[ref_frame][0].this_mv.as_mv); |
2581 | 0 | } else { |
2582 | 0 | ref_mvs[ref_frame] = |
2583 | 0 | get_fullmv_from_mv(&mbmi_ext.global_mvs[ref_frame].as_mv); |
2584 | 0 | } |
2585 | |
|
2586 | 0 | init_simple_motion_search_mvs(sms_root, ref_mvs); |
2587 | 0 | } |