/src/libavif/ext/aom/av1/encoder/partition_strategy.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2019, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <float.h> |
13 | | |
14 | | #include "config/aom_config.h" |
15 | | |
16 | | #include "av1/encoder/encodeframe_utils.h" |
17 | | #if CONFIG_THREE_PASS |
18 | | #include "av1/encoder/thirdpass.h" |
19 | | #endif |
20 | | #include "config/aom_dsp_rtcd.h" |
21 | | |
22 | | #include "av1/common/enums.h" |
23 | | #include "av1/common/reconinter.h" |
24 | | |
25 | | #if !CONFIG_REALTIME_ONLY |
26 | | #include "av1/encoder/cnn.h" |
27 | | #include "av1/encoder/partition_model_weights.h" |
28 | | #include "av1/encoder/partition_cnn_weights.h" |
29 | | #endif |
30 | | #include "av1/encoder/encoder.h" |
31 | | |
32 | | #include "av1/encoder/motion_search_facade.h" |
33 | | #include "av1/encoder/partition_strategy.h" |
34 | | #include "av1/encoder/partition_search.h" |
35 | | #include "av1/encoder/rdopt.h" |
36 | | |
37 | | #if !CONFIG_REALTIME_ONLY |
38 | | static inline void simple_motion_search_prune_part_features( |
39 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
40 | | int mi_row, int mi_col, BLOCK_SIZE bsize, float *features, |
41 | | int features_to_get); |
42 | | |
43 | | static bool ext_ml_model_decision_before_none( |
44 | | AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT], |
45 | | int *partition_none_allowed, int *partition_horz_allowed, |
46 | | int *partition_vert_allowed, int *do_rectangular_split, |
47 | | int *do_square_split); |
48 | | |
49 | | static bool ext_ml_model_decision_before_none_part2( |
50 | | AV1_COMP *cpi, |
51 | | const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART], |
52 | | int *prune_horz, int *prune_vert); |
53 | | |
54 | | static bool ext_ml_model_decision_after_none( |
55 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
56 | | const float *const features_after_none, int *do_square_split, |
57 | | int *do_rectangular_split); |
58 | | |
59 | | static bool ext_ml_model_decision_after_none_part2( |
60 | | AV1_COMP *const cpi, const float *const features_terminate, |
61 | | int *terminate_partition_search); |
62 | | |
63 | | static bool ext_ml_model_decision_after_split( |
64 | | AV1_COMP *const cpi, const float *const features_terminate, |
65 | | int *terminate_partition_search); |
66 | | |
67 | | static bool ext_ml_model_decision_after_split_part2( |
68 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
69 | | const float *const features_prune, int *prune_rect_part_horz, |
70 | | int *prune_rect_part_vert); |
71 | | |
72 | | static bool ext_ml_model_decision_after_rect( |
73 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
74 | | const float *const features_after_rect, int *horza_partition_allowed, |
75 | | int *horzb_partition_allowed, int *verta_partition_allowed, |
76 | | int *vertb_partition_allowed); |
77 | | |
78 | | static bool ext_ml_model_decision_after_part_ab( |
79 | | AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx, |
80 | | int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], |
81 | | int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed, |
82 | | int *const partition_vert4_allowed, unsigned int pb_source_variance, |
83 | | int mi_row, int mi_col); |
84 | | |
85 | 3.40M | static inline int convert_bsize_to_idx(BLOCK_SIZE bsize) { |
86 | 3.40M | switch (bsize) { |
87 | 0 | case BLOCK_128X128: return 0; |
88 | 65.5k | case BLOCK_64X64: return 1; |
89 | 260k | case BLOCK_32X32: return 2; |
90 | 697k | case BLOCK_16X16: return 3; |
91 | 2.38M | case BLOCK_8X8: return 4; |
92 | 0 | default: assert(0 && "Invalid bsize"); return -1; |
93 | 3.40M | } |
94 | 3.40M | } |
95 | | |
96 | 0 | static char *get_feature_file_name(int id) { |
97 | 0 | static char *feature_file_names[] = { |
98 | 0 | "feature_before_partition_none", |
99 | 0 | "feature_before_partition_none_prune_rect", |
100 | 0 | "feature_after_partition_none_prune", |
101 | 0 | "feature_after_partition_none_terminate", |
102 | 0 | "feature_after_partition_split_terminate", |
103 | 0 | "feature_after_partition_split_prune_rect", |
104 | 0 | "feature_after_partition_rect", |
105 | 0 | "feature_after_partition_ab", |
106 | 0 | }; |
107 | |
|
108 | 0 | return feature_file_names[id]; |
109 | 0 | } |
110 | | |
111 | | static void write_features_to_file(const char *const path, |
112 | | const bool is_test_mode, |
113 | | const float *features, |
114 | | const int feature_size, const int id, |
115 | | const BLOCK_SIZE bsize, const int mi_row, |
116 | 379k | const int mi_col) { |
117 | 379k | if (!WRITE_FEATURE_TO_FILE && !is_test_mode) return; |
118 | | |
119 | 11 | char filename[256]; |
120 | 11 | snprintf(filename, sizeof(filename), "%s/%s", path, |
121 | 11 | get_feature_file_name(id)); |
122 | 11 | FILE *pfile = fopen(filename, "a"); |
123 | 11 | if (pfile == NULL) return; |
124 | 11 | if (!is_test_mode) { |
125 | 0 | fprintf(pfile, "%d,%d,%d,%d,%d\n", id, (int)bsize, mi_row, mi_col, |
126 | 0 | feature_size); |
127 | 0 | } |
128 | 11 | for (int i = 0; i < feature_size; ++i) { |
129 | 0 | fprintf(pfile, "%.6f", features[i]); |
130 | 0 | if (i < feature_size - 1) fprintf(pfile, ","); |
131 | 0 | } |
132 | 11 | fprintf(pfile, "\n"); |
133 | 11 | fclose(pfile); |
134 | 11 | } |
135 | | |
136 | | // TODO(chiyotsai@google.com): This is very much a work in progress. We still |
137 | | // need to the following: |
138 | | // -- add support for hdres |
139 | | // -- add support for pruning rectangular partitions |
140 | | // -- use reconstructed pixels instead of source pixels for padding |
141 | | // -- use chroma pixels in addition to luma pixels |
142 | | static void intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x, |
143 | | int quad_tree_idx, |
144 | | int intra_cnn_based_part_prune_level, |
145 | 3.04M | PartitionSearchState *part_state) { |
146 | 3.04M | assert(cm->seq_params->sb_size >= BLOCK_64X64 && |
147 | 3.04M | "Invalid sb_size for intra_cnn!"); |
148 | 3.04M | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
149 | 3.04M | const BLOCK_SIZE bsize = blk_params->bsize; |
150 | | |
151 | 3.04M | const int bsize_idx = convert_bsize_to_idx(bsize); |
152 | | |
153 | 3.04M | if (bsize == BLOCK_128X128) { |
154 | 0 | return; |
155 | 0 | } |
156 | | |
157 | 3.04M | PartitionSearchInfo *part_info = &x->part_search_info; |
158 | | |
159 | | // Precompute the CNN part and cache the result in MACROBLOCK |
160 | 3.04M | if (bsize == BLOCK_64X64 && !part_info->cnn_output_valid) { |
161 | 37.5k | const CNN_CONFIG *cnn_config = &av1_intra_mode_cnn_partition_cnn_config; |
162 | | |
163 | | // Prepare the output |
164 | 37.5k | const CNN_THREAD_DATA thread_data = { .num_workers = 1, .workers = NULL }; |
165 | 37.5k | const int num_outputs = 4; |
166 | 37.5k | const int output_dims[4] = { 1, 2, 4, 8 }; |
167 | 37.5k | const int out_chs[4] = { CNN_BRANCH_0_OUT_CH, CNN_BRANCH_1_OUT_CH, |
168 | 37.5k | CNN_BRANCH_2_OUT_CH, CNN_BRANCH_3_OUT_CH }; |
169 | 37.5k | float *output_buffer[CNN_TOT_OUT_CH]; |
170 | | |
171 | 37.5k | float **cur_output_buf = output_buffer; |
172 | 37.5k | float *curr_buf_ptr = part_info->cnn_buffer; |
173 | 187k | for (int output_idx = 0; output_idx < num_outputs; output_idx++) { |
174 | 149k | const int num_chs = out_chs[output_idx]; |
175 | 149k | const int ch_size = output_dims[output_idx] * output_dims[output_idx]; |
176 | 2.55M | for (int ch = 0; ch < num_chs; ch++) { |
177 | 2.40M | cur_output_buf[ch] = curr_buf_ptr; |
178 | 2.40M | curr_buf_ptr += ch_size; |
179 | 2.40M | } |
180 | 149k | cur_output_buf += num_chs; |
181 | 149k | } |
182 | | |
183 | 37.5k | CNN_MULTI_OUT output = { |
184 | 37.5k | .num_outputs = 4, |
185 | 37.5k | .output_channels = out_chs, |
186 | 37.5k | .output_strides = output_dims, |
187 | 37.5k | .output_buffer = output_buffer, |
188 | 37.5k | }; |
189 | | |
190 | | // Prepare the input |
191 | 37.5k | const MACROBLOCKD *xd = &x->e_mbd; |
192 | 37.5k | const int bit_depth = xd->bd; |
193 | 37.5k | const int dc_q = |
194 | 37.5k | av1_dc_quant_QTX(x->qindex, 0, bit_depth) >> (bit_depth - 8); |
195 | 37.5k | part_info->log_q = log1pf((float)(dc_q * dc_q) / 256.0f); |
196 | 37.5k | part_info->log_q = |
197 | 37.5k | (part_info->log_q - av1_intra_mode_cnn_partition_mean[0]) / |
198 | 37.5k | av1_intra_mode_cnn_partition_std[0]; |
199 | | |
200 | 37.5k | const int width = 65, height = 65, |
201 | 37.5k | stride = x->plane[AOM_PLANE_Y].src.stride; |
202 | | |
203 | 37.5k | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
204 | 2.83k | uint16_t *image[1] = { |
205 | 2.83k | CONVERT_TO_SHORTPTR(x->plane[AOM_PLANE_Y].src.buf) - stride - 1 |
206 | 2.83k | }; |
207 | | |
208 | 2.83k | if (!av1_cnn_predict_img_multi_out_highbd(image, width, height, stride, |
209 | 2.83k | cnn_config, &thread_data, |
210 | 2.83k | bit_depth, &output)) { |
211 | 0 | aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, |
212 | 0 | "Error allocating CNN data"); |
213 | 0 | return; |
214 | 0 | } |
215 | 34.7k | } else { |
216 | 34.7k | uint8_t *image[1] = { x->plane[AOM_PLANE_Y].src.buf - stride - 1 }; |
217 | | |
218 | 34.7k | if (!av1_cnn_predict_img_multi_out(image, width, height, stride, |
219 | 34.7k | cnn_config, &thread_data, &output)) { |
220 | 0 | aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, |
221 | 0 | "Error allocating CNN data"); |
222 | 0 | return; |
223 | 0 | } |
224 | 34.7k | } |
225 | | |
226 | 37.5k | part_info->cnn_output_valid = 1; |
227 | 37.5k | } |
228 | | |
229 | 3.04M | if (!part_info->cnn_output_valid) { |
230 | 1.84M | return; |
231 | 1.84M | } |
232 | | |
233 | 1.19M | const NN_CONFIG *dnn_configs[5] = { |
234 | 1.19M | NULL, |
235 | 1.19M | &av1_intra_mode_cnn_partition_branch_0_dnn_config, |
236 | 1.19M | &av1_intra_mode_cnn_partition_branch_1_dnn_config, |
237 | 1.19M | &av1_intra_mode_cnn_partition_branch_2_dnn_config, |
238 | 1.19M | &av1_intra_mode_cnn_partition_branch_3_dnn_config, |
239 | 1.19M | }; |
240 | | |
241 | 1.19M | const NN_CONFIG *dnn_config = dnn_configs[bsize_idx]; |
242 | | |
243 | 1.19M | float dnn_features[100]; |
244 | 1.19M | float logits[4] = { 0.0f }; |
245 | | |
246 | 1.19M | const float *branch_0 = part_info->cnn_buffer; |
247 | 1.19M | const float *branch_1 = branch_0 + CNN_BRANCH_0_OUT_SIZE; |
248 | 1.19M | const float *branch_2 = branch_1 + CNN_BRANCH_1_OUT_SIZE; |
249 | 1.19M | const float *branch_3 = branch_2 + CNN_BRANCH_2_OUT_SIZE; |
250 | | |
251 | 1.19M | if (bsize == BLOCK_64X64) { |
252 | 37.5k | int f_idx = 0; |
253 | 788k | for (int ch_idx = 0; ch_idx < CNN_BRANCH_0_OUT_CH; ch_idx++) { |
254 | 751k | dnn_features[f_idx++] = branch_0[ch_idx]; |
255 | 751k | } |
256 | | |
257 | 37.5k | const int spa_stride = 2 * 2; |
258 | 187k | for (int lin_idx = 0; lin_idx < spa_stride; lin_idx++) { |
259 | 751k | for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { |
260 | 601k | dnn_features[f_idx++] = branch_1[lin_idx + ch_idx * spa_stride]; |
261 | 601k | } |
262 | 150k | } |
263 | 37.5k | dnn_features[f_idx++] = part_info->log_q; |
264 | 1.15M | } else if (bsize == BLOCK_32X32) { |
265 | 150k | int f_idx = 0; |
266 | 3.15M | for (int idx = 0; idx < CNN_BRANCH_0_OUT_CH; idx++) { |
267 | 3.00M | dnn_features[f_idx++] = branch_0[idx]; |
268 | 3.00M | } |
269 | | |
270 | 150k | const int curr_lin_idx = quad_to_linear_1[quad_tree_idx - 1]; |
271 | 150k | const int spa_stride = 2 * 2; |
272 | 752k | for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { |
273 | 601k | dnn_features[f_idx++] = branch_1[curr_lin_idx + ch_idx * spa_stride]; |
274 | 601k | } |
275 | 150k | dnn_features[f_idx++] = part_info->log_q; |
276 | 1.00M | } else if (bsize == BLOCK_16X16) { |
277 | 202k | int f_idx = 0; |
278 | 202k | const int prev_quad_idx = (quad_tree_idx - 1) / 4; |
279 | 202k | const int prev_lin_idx = quad_to_linear_1[prev_quad_idx - 1]; |
280 | 202k | const int prev_spa_stride = 2 * 2; |
281 | 1.01M | for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { |
282 | 810k | dnn_features[f_idx++] = branch_1[prev_lin_idx + ch_idx * prev_spa_stride]; |
283 | 810k | } |
284 | | |
285 | 202k | const int curr_lin_idx = quad_to_linear_2[quad_tree_idx - 5]; |
286 | 202k | const int spa_stride = 4 * 4; |
287 | 4.25M | for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { |
288 | 4.05M | dnn_features[f_idx++] = branch_2[curr_lin_idx + ch_idx * spa_stride]; |
289 | 4.05M | } |
290 | 202k | dnn_features[f_idx++] = part_info->log_q; |
291 | 801k | } else if (bsize == BLOCK_8X8) { |
292 | 801k | int f_idx = 0; |
293 | 801k | const int prev_quad_idx = (quad_tree_idx - 1) / 4; |
294 | 801k | const int prev_lin_idx = quad_to_linear_2[prev_quad_idx - 5]; |
295 | 801k | const int prev_spa_stride = 4 * 4; |
296 | 16.8M | for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { |
297 | 16.0M | dnn_features[f_idx++] = branch_2[prev_lin_idx + ch_idx * prev_spa_stride]; |
298 | 16.0M | } |
299 | | |
300 | 801k | const int curr_lin_idx = quad_to_linear_3[quad_tree_idx - 21]; |
301 | 801k | const int spa_stride = 8 * 8; |
302 | 16.8M | for (int ch_idx = 0; ch_idx < CNN_BRANCH_3_OUT_CH; ch_idx++) { |
303 | 16.0M | dnn_features[f_idx++] = branch_3[curr_lin_idx + ch_idx * spa_stride]; |
304 | 16.0M | } |
305 | 801k | dnn_features[f_idx++] = part_info->log_q; |
306 | 801k | } else { |
307 | 156 | assert(0 && "Invalid bsize in intra_cnn partition"); |
308 | 156 | } |
309 | | |
310 | | // Make decision |
311 | 1.19M | av1_nn_predict(dnn_features, dnn_config, 1, logits); |
312 | | |
313 | 1.19M | const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; |
314 | 1.19M | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
315 | 1.19M | float split_only_thresh = 100.0f, no_split_thresh = -100.0f; |
316 | 1.19M | if (is_720p_or_larger) { |
317 | 0 | split_only_thresh = |
318 | 0 | av1_intra_mode_cnn_partition_split_thresh_hdres[bsize_idx]; |
319 | 0 | no_split_thresh = |
320 | 0 | av1_intra_mode_cnn_partition_no_split_thresh_hdres[bsize_idx]; |
321 | 1.19M | } else if (is_480p_or_larger) { |
322 | 167k | split_only_thresh = |
323 | 167k | av1_intra_mode_cnn_partition_split_thresh_midres[bsize_idx]; |
324 | 167k | no_split_thresh = |
325 | 167k | av1_intra_mode_cnn_partition_no_split_thresh_midres[bsize_idx]; |
326 | 1.02M | } else { |
327 | 1.02M | split_only_thresh = |
328 | 1.02M | av1_intra_mode_cnn_partition_split_thresh_lowres[bsize_idx]; |
329 | 1.02M | no_split_thresh = |
330 | 1.02M | av1_intra_mode_cnn_partition_no_split_thresh_lowres[bsize_idx]; |
331 | 1.02M | } |
332 | | |
333 | 1.19M | if (logits[0] > split_only_thresh) { |
334 | | // As screen contents tend to choose larger partitions, do not prune |
335 | | // PARTITION_NONE when intra_cnn_based_part_prune_level=1. |
336 | 111k | if (intra_cnn_based_part_prune_level != 1) { |
337 | 111k | part_state->partition_none_allowed = 0; |
338 | 111k | } |
339 | 111k | part_state->do_square_split = 1; |
340 | 111k | av1_disable_rect_partitions(part_state); |
341 | 111k | } |
342 | | |
343 | 1.19M | if (logits[0] < no_split_thresh) { |
344 | 484k | av1_disable_square_split_partition(part_state); |
345 | 484k | } |
346 | 1.19M | } |
347 | | |
348 | | static inline int get_simple_motion_search_prune_agg(int qindex, |
349 | | int prune_level, |
350 | 366k | int is_rect_part) { |
351 | 366k | assert(prune_level < TOTAL_AGG_LVLS); |
352 | 366k | if (prune_level == NO_PRUNING) { |
353 | 0 | return -1; |
354 | 0 | } |
355 | | |
356 | | // Aggressiveness value for SIMPLE_MOTION_SEARCH_PRUNE_LEVEL except |
357 | | // QIDX_BASED_AGG_LVL |
358 | 366k | const int sms_prune_agg_levels[TOTAL_SIMPLE_AGG_LVLS] = { 0, 1, 2, 3, 4, 5 }; |
359 | 366k | if (prune_level < TOTAL_SIMPLE_AGG_LVLS) { |
360 | 366k | return sms_prune_agg_levels[prune_level]; |
361 | 366k | } |
362 | | |
363 | | // Map the QIDX_BASED_AGG_LVL to corresponding aggressiveness value. |
364 | | // Aggressive pruning for lower quantizers in non-boosted frames to prune |
365 | | // rectangular partitions. |
366 | 11 | const int qband = is_rect_part ? (qindex <= 90 ? 1 : 0) : 0; |
367 | 11 | const int sms_prune_agg_qindex_based[2] = { 3, 4 }; |
368 | 11 | return sms_prune_agg_qindex_based[qband]; |
369 | 366k | } |
370 | | |
371 | | // Performs a simple_motion_search with a single reference frame and extract |
372 | | // the variance of residues. Then use the features to determine whether we want |
373 | | // to go straight to splitting without trying PARTITION_NONE |
374 | | static void simple_motion_search_based_split(AV1_COMP *const cpi, MACROBLOCK *x, |
375 | | SIMPLE_MOTION_DATA_TREE *sms_tree, |
376 | 233k | PartitionSearchState *part_state) { |
377 | 233k | const AV1_COMMON *const cm = &cpi->common; |
378 | 233k | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
379 | 233k | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
380 | 233k | const BLOCK_SIZE bsize = blk_params->bsize; |
381 | | |
382 | 233k | const int bsize_idx = convert_bsize_to_idx(bsize); |
383 | 233k | const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; |
384 | 233k | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
385 | | // res_idx is 0 for res < 480p, 1 for 480p, 2 for 720p+ |
386 | 233k | const int res_idx = is_480p_or_larger + is_720p_or_larger; |
387 | | |
388 | 233k | assert(bsize_idx >= 0 && bsize_idx <= 4 && |
389 | 233k | "Invalid bsize in simple_motion_search_based_split"); |
390 | | |
391 | 233k | const int agg = get_simple_motion_search_prune_agg( |
392 | 233k | x->qindex, cpi->sf.part_sf.simple_motion_search_prune_agg, 0); |
393 | 233k | if (agg < 0) { |
394 | 0 | return; |
395 | 0 | } |
396 | | |
397 | 233k | int ml_model_index = (agg == SIMPLE_AGG_LVL1 || agg == SIMPLE_AGG_LVL2); |
398 | | |
399 | 233k | const float *ml_mean = |
400 | 233k | av1_simple_motion_search_split_mean[ml_model_index][bsize_idx]; |
401 | 233k | const float *ml_std = |
402 | 233k | av1_simple_motion_search_split_std[ml_model_index][bsize_idx]; |
403 | 233k | const NN_CONFIG *nn_config = |
404 | 233k | av1_simple_motion_search_split_nn_config[ml_model_index][bsize_idx]; |
405 | | |
406 | 233k | const float split_only_thresh = |
407 | 233k | av1_simple_motion_search_split_thresh[agg][res_idx][bsize_idx]; |
408 | 233k | const float no_split_thresh = |
409 | 233k | av1_simple_motion_search_no_split_thresh[agg][res_idx][bsize_idx]; |
410 | | |
411 | 233k | float features[FEATURE_SIZE_SMS_SPLIT] = { 0.0f }; |
412 | 233k | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
413 | 233k | bsize, features, |
414 | 233k | FEATURE_SMS_SPLIT_MODEL_FLAG); |
415 | | |
416 | | // Write features to file |
417 | 233k | write_features_to_file(cpi->oxcf.partition_info_path, |
418 | 233k | cpi->ext_part_controller.test_mode, features, |
419 | 233k | FEATURE_SIZE_SMS_SPLIT, 0, bsize, mi_row, mi_col); |
420 | | |
421 | | // Note: it is intended to not normalize the features here, to keep it |
422 | | // consistent for all features collected and passed to the external model. |
423 | 233k | if (ext_ml_model_decision_before_none( |
424 | 233k | cpi, features, &part_state->partition_none_allowed, |
425 | 233k | &part_state->partition_rect_allowed[HORZ], |
426 | 233k | &part_state->partition_rect_allowed[VERT], |
427 | 233k | &part_state->do_rectangular_split, &part_state->do_square_split)) { |
428 | 0 | return; |
429 | 0 | } |
430 | | |
431 | 4.20M | for (int idx = 0; idx < FEATURE_SIZE_SMS_SPLIT; idx++) { |
432 | 3.96M | features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; |
433 | 3.96M | } |
434 | | |
435 | 233k | float score = 0.0f; |
436 | | |
437 | 233k | av1_nn_predict(features, nn_config, 1, &score); |
438 | | |
439 | 233k | if (score > split_only_thresh) { |
440 | 232k | av1_set_square_split_only(part_state); |
441 | 232k | } |
442 | | |
443 | 233k | if (cpi->sf.part_sf.simple_motion_search_split >= 2 && |
444 | 233k | score < no_split_thresh) { |
445 | 36 | av1_disable_square_split_partition(part_state); |
446 | 36 | } |
447 | | |
448 | | // If the score is very low, prune rectangular split since it is unlikely to |
449 | | // occur. |
450 | 233k | if (cpi->sf.part_sf.simple_motion_search_rect_split) { |
451 | 233k | const float scale = res_idx >= 2 ? 3.0f : 2.0f; |
452 | 233k | const float rect_split_thresh = |
453 | 233k | scale * av1_simple_motion_search_no_split_thresh[SIMPLE_AGG_LVL3] |
454 | 233k | [res_idx][bsize_idx]; |
455 | 233k | if (score < rect_split_thresh) { |
456 | 0 | part_state->do_rectangular_split = 0; |
457 | 0 | } |
458 | 233k | } |
459 | 233k | } |
460 | | |
461 | | // Given a list of ref frames in refs, performs simple_motion_search on each of |
462 | | // the refs and returns the ref with the smallest sse. Returns -1 if none of the |
463 | | // ref in the list is available. Also stores the best sse and var in best_sse, |
464 | | // best_var, respectively. If save_mv is 0, don't update mv_ref_fulls in |
465 | | // sms_tree. If save_mv is 1, update mv_ref_fulls under sms_tree and the |
466 | | // subtrees. |
467 | | static int simple_motion_search_get_best_ref( |
468 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
469 | | int mi_row, int mi_col, BLOCK_SIZE bsize, const int *const refs, |
470 | | int num_refs, int use_subpixel, int save_mv, unsigned int *best_sse, |
471 | 2.13M | unsigned int *best_var) { |
472 | 2.13M | const AV1_COMMON *const cm = &cpi->common; |
473 | 2.13M | int best_ref = -1; |
474 | | |
475 | 2.13M | if (mi_col >= cm->mi_params.mi_cols || mi_row >= cm->mi_params.mi_rows) { |
476 | | // If the whole block is outside of the image, set the var and sse to 0. |
477 | 395k | *best_var = 0; |
478 | 395k | *best_sse = 0; |
479 | | |
480 | 395k | return best_ref; |
481 | 395k | } |
482 | | |
483 | | // Otherwise do loop through the reference frames and find the one with the |
484 | | // minimum SSE |
485 | 1.73M | const int num_planes = 1; |
486 | | |
487 | 1.73M | *best_sse = INT_MAX; |
488 | | |
489 | 3.47M | for (int ref_idx = 0; ref_idx < num_refs; ref_idx++) { |
490 | 1.73M | const int ref = refs[ref_idx]; |
491 | | |
492 | 1.73M | if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref]) { |
493 | 1.73M | const FULLPEL_MV *start_mvs = sms_tree->start_mvs; |
494 | 1.73M | unsigned int curr_sse = 0, curr_var = 0; |
495 | 1.73M | const int_mv best_mv = av1_simple_motion_search_sse_var( |
496 | 1.73M | cpi, x, mi_row, mi_col, bsize, ref, start_mvs[ref], num_planes, |
497 | 1.73M | use_subpixel, &curr_sse, &curr_var); |
498 | 1.73M | if (curr_sse < *best_sse) { |
499 | 1.73M | *best_sse = curr_sse; |
500 | 1.73M | *best_var = curr_var; |
501 | 1.73M | best_ref = ref; |
502 | 1.73M | } |
503 | | |
504 | 1.73M | if (save_mv) { |
505 | 1.28M | sms_tree->start_mvs[ref].row = best_mv.as_mv.row / 8; |
506 | 1.28M | sms_tree->start_mvs[ref].col = best_mv.as_mv.col / 8; |
507 | | |
508 | 1.28M | if (bsize >= BLOCK_8X8) { |
509 | 6.43M | for (int r_idx = 0; r_idx < SUB_PARTITIONS_SPLIT; r_idx++) { |
510 | | // Propagate the new motion vectors to a lower level |
511 | 5.15M | SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx]; |
512 | 5.15M | sub_tree->start_mvs[ref] = sms_tree->start_mvs[ref]; |
513 | 5.15M | } |
514 | 1.28M | } |
515 | 1.28M | } |
516 | 1.73M | } |
517 | 1.73M | } |
518 | | |
519 | 1.73M | return best_ref; |
520 | 2.13M | } |
521 | | |
522 | | // Collects features using simple_motion_search and store them in features. The |
523 | | // features are also cached in SIMPLE_MOTION_DATA_TREE. By default, the features |
524 | | // collected are the sse and var from the subblocks flagged by features_to_get. |
525 | | // Furthermore, if features is not NULL, then 7 more features are appended to |
526 | | // the end of features: |
527 | | // - log(1.0 + dc_q ** 2) |
528 | | // - whether an above macroblock exists |
529 | | // - width of above macroblock |
530 | | // - height of above macroblock |
531 | | // - whether a left marcoblock exists |
532 | | // - width of left macroblock |
533 | | // - height of left macroblock |
534 | | static inline void simple_motion_search_prune_part_features( |
535 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
536 | | int mi_row, int mi_col, BLOCK_SIZE bsize, float *features, |
537 | 379k | int features_to_get) { |
538 | 379k | const int w_mi = mi_size_wide[bsize]; |
539 | 379k | const int h_mi = mi_size_high[bsize]; |
540 | 379k | assert(mi_size_wide[bsize] == mi_size_high[bsize]); |
541 | 379k | assert(bsize >= BLOCK_8X8); |
542 | 379k | assert(cpi->ref_frame_flags & av1_ref_frame_flag_list[LAST_FRAME] || |
543 | 379k | cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]); |
544 | | |
545 | | // Setting up motion search |
546 | 379k | const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
547 | 379k | : LAST_FRAME }; |
548 | 379k | const int num_refs = 1; |
549 | 379k | const int use_subpixel = 1; |
550 | | |
551 | | // Doing whole block first to update the mv |
552 | 379k | if (!sms_tree->sms_none_valid && features_to_get & FEATURE_SMS_NONE_FLAG) { |
553 | 41.5k | simple_motion_search_get_best_ref(cpi, x, sms_tree, mi_row, mi_col, bsize, |
554 | 41.5k | ref_list, num_refs, use_subpixel, 1, |
555 | 41.5k | &sms_tree->sms_none_feat[0], |
556 | 41.5k | &sms_tree->sms_none_feat[1]); |
557 | 41.5k | sms_tree->sms_none_valid = 1; |
558 | 41.5k | } |
559 | | |
560 | | // Split subblocks |
561 | 379k | if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { |
562 | 379k | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
563 | 1.89M | for (int r_idx = 0; r_idx < SUB_PARTITIONS_SPLIT; r_idx++) { |
564 | 1.51M | const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2; |
565 | 1.51M | const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2; |
566 | 1.51M | SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx]; |
567 | | |
568 | 1.51M | if (!sub_tree->sms_none_valid) { |
569 | 1.51M | simple_motion_search_get_best_ref( |
570 | 1.51M | cpi, x, sub_tree, sub_mi_row, sub_mi_col, subsize, ref_list, |
571 | 1.51M | num_refs, use_subpixel, 1, &sub_tree->sms_none_feat[0], |
572 | 1.51M | &sub_tree->sms_none_feat[1]); |
573 | 1.51M | sub_tree->sms_none_valid = 1; |
574 | 1.51M | } |
575 | 1.51M | } |
576 | 379k | } |
577 | | |
578 | | // Rectangular subblocks |
579 | 379k | if (!sms_tree->sms_rect_valid && features_to_get & FEATURE_SMS_RECT_FLAG) { |
580 | | // Horz subblock |
581 | 145k | BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); |
582 | 436k | for (int r_idx = 0; r_idx < SUB_PARTITIONS_RECT; r_idx++) { |
583 | 290k | const int sub_mi_col = mi_col + 0; |
584 | 290k | const int sub_mi_row = mi_row + r_idx * h_mi / 2; |
585 | | |
586 | 290k | simple_motion_search_get_best_ref( |
587 | 290k | cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
588 | 290k | use_subpixel, 0, &sms_tree->sms_rect_feat[2 * r_idx], |
589 | 290k | &sms_tree->sms_rect_feat[2 * r_idx + 1]); |
590 | 290k | } |
591 | | |
592 | | // Vert subblock |
593 | 145k | subsize = get_partition_subsize(bsize, PARTITION_VERT); |
594 | 436k | for (int r_idx = 0; r_idx < SUB_PARTITIONS_RECT; r_idx++) { |
595 | 290k | const int sub_mi_col = mi_col + r_idx * w_mi / 2; |
596 | 290k | const int sub_mi_row = mi_row + 0; |
597 | | |
598 | 290k | simple_motion_search_get_best_ref( |
599 | 290k | cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, |
600 | 290k | use_subpixel, 0, &sms_tree->sms_rect_feat[4 + 2 * r_idx], |
601 | 290k | &sms_tree->sms_rect_feat[4 + 2 * r_idx + 1]); |
602 | 290k | } |
603 | 145k | sms_tree->sms_rect_valid = 1; |
604 | 145k | } |
605 | | |
606 | 379k | if (!features) return; |
607 | | |
608 | 379k | int f_idx = 0; |
609 | 380k | if (features_to_get & FEATURE_SMS_NONE_FLAG) { |
610 | 1.13M | for (int sub_idx = 0; sub_idx < 2; sub_idx++) { |
611 | 759k | features[f_idx++] = log1pf((float)sms_tree->sms_none_feat[sub_idx]); |
612 | 759k | } |
613 | 380k | } |
614 | | |
615 | 379k | if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { |
616 | 1.89M | for (int sub_idx = 0; sub_idx < SUB_PARTITIONS_SPLIT; sub_idx++) { |
617 | 1.51M | SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[sub_idx]; |
618 | 1.51M | features[f_idx++] = log1pf((float)sub_tree->sms_none_feat[0]); |
619 | 1.51M | features[f_idx++] = log1pf((float)sub_tree->sms_none_feat[1]); |
620 | 1.51M | } |
621 | 379k | } |
622 | | |
623 | 379k | if (features_to_get & FEATURE_SMS_RECT_FLAG) { |
624 | 1.31M | for (int sub_idx = 0; sub_idx < 8; sub_idx++) { |
625 | 1.16M | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[sub_idx]); |
626 | 1.16M | } |
627 | 146k | } |
628 | | |
629 | 379k | const MACROBLOCKD *xd = &x->e_mbd; |
630 | 379k | set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); |
631 | | |
632 | | // Q_INDEX |
633 | 379k | const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
634 | 379k | features[f_idx++] = log1pf((float)(dc_q * dc_q) / 256.0f); |
635 | | |
636 | | // Neighbor stuff |
637 | 379k | const int has_above = !!xd->above_mbmi; |
638 | 379k | const int has_left = !!xd->left_mbmi; |
639 | 379k | const BLOCK_SIZE above_bsize = has_above ? xd->above_mbmi->bsize : bsize; |
640 | 379k | const BLOCK_SIZE left_bsize = has_left ? xd->left_mbmi->bsize : bsize; |
641 | 379k | features[f_idx++] = (float)has_above; |
642 | 379k | features[f_idx++] = (float)mi_size_wide_log2[above_bsize]; |
643 | 379k | features[f_idx++] = (float)mi_size_high_log2[above_bsize]; |
644 | 379k | features[f_idx++] = (float)has_left; |
645 | 379k | features[f_idx++] = (float)mi_size_wide_log2[left_bsize]; |
646 | 379k | features[f_idx++] = (float)mi_size_high_log2[left_bsize]; |
647 | 379k | } |
648 | | |
649 | | // Performs a simple_motion_search with two reference frames and extract |
650 | | // the variance of residues. Then use the features to determine whether we want |
651 | | // to prune some partitions. |
652 | | static void simple_motion_search_prune_rect(AV1_COMP *const cpi, MACROBLOCK *x, |
653 | | SIMPLE_MOTION_DATA_TREE *sms_tree, |
654 | 133k | PartitionSearchState *part_state) { |
655 | 133k | const AV1_COMMON *const cm = &cpi->common; |
656 | 133k | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
657 | 133k | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
658 | 133k | const BLOCK_SIZE bsize = blk_params->bsize; |
659 | | |
660 | 133k | const int bsize_idx = convert_bsize_to_idx(bsize); |
661 | 133k | const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; |
662 | 133k | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
663 | | // res_idx is 0 for lowres, 1 for 48p, 2 for 720p+ |
664 | 133k | const int res_idx = is_480p_or_larger + is_720p_or_larger; |
665 | | |
666 | | // Get model parameters |
667 | 133k | const NN_CONFIG *nn_config = |
668 | 133k | av1_simple_motion_search_prune_rect_nn_config[bsize_idx]; |
669 | 133k | const float *ml_mean = av1_simple_motion_search_prune_rect_mean[bsize_idx], |
670 | 133k | *ml_std = av1_simple_motion_search_prune_rect_std[bsize_idx]; |
671 | | |
672 | 133k | const int agg = get_simple_motion_search_prune_agg( |
673 | 133k | x->qindex, cpi->sf.part_sf.simple_motion_search_prune_agg, 1); |
674 | 133k | if (agg < 0) { |
675 | 0 | return; |
676 | 0 | } |
677 | | |
678 | 133k | const float prune_thresh = |
679 | 133k | av1_simple_motion_search_prune_rect_thresh[agg][res_idx][bsize_idx]; |
680 | | |
681 | | // If there is no valid threshold, return immediately. |
682 | 133k | if (!nn_config || prune_thresh == 0.0f) { |
683 | 0 | return; |
684 | 0 | } |
685 | | |
686 | | // Get features |
687 | 133k | float features[FEATURE_SIZE_SMS_PRUNE_PART] = { 0.0f }; |
688 | 133k | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
689 | 133k | bsize, features, |
690 | 133k | FEATURE_SMS_PRUNE_PART_FLAG); |
691 | | |
692 | | // Note: it is intended to not normalize the features here, to keep it |
693 | | // consistent for all features collected and passed to the external model. |
694 | 133k | if (cpi->sf.part_sf.simple_motion_search_prune_rect && |
695 | 133k | !frame_is_intra_only(cm) && |
696 | 133k | (part_state->partition_rect_allowed[HORZ] || |
697 | 67.7k | part_state->partition_rect_allowed[VERT]) && |
698 | 133k | bsize >= BLOCK_8X8 && !av1_superres_scaled(cm)) { |
699 | | // Write features to file |
700 | 133k | write_features_to_file( |
701 | 133k | cpi->oxcf.partition_info_path, cpi->ext_part_controller.test_mode, |
702 | 133k | features, FEATURE_SIZE_SMS_PRUNE_PART, 1, bsize, mi_row, mi_col); |
703 | | |
704 | 133k | if (ext_ml_model_decision_before_none_part2( |
705 | 133k | cpi, features, &part_state->prune_rect_part[HORZ], |
706 | 133k | &part_state->prune_rect_part[VERT])) { |
707 | 0 | return; |
708 | 0 | } |
709 | 133k | } |
710 | | |
711 | 3.45M | for (int f_idx = 0; f_idx < FEATURE_SIZE_SMS_PRUNE_PART; f_idx++) { |
712 | 3.31M | features[f_idx] = (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
713 | 3.31M | } |
714 | | |
715 | | // Get probabilities |
716 | 133k | float scores[EXT_PARTITION_TYPES] = { 0.0f }, |
717 | 133k | probs[EXT_PARTITION_TYPES] = { 0.0f }; |
718 | 133k | const int num_classes = (bsize == BLOCK_128X128 || bsize == BLOCK_8X8) |
719 | 133k | ? PARTITION_TYPES |
720 | 133k | : EXT_PARTITION_TYPES; |
721 | | |
722 | 133k | av1_nn_predict(features, nn_config, 1, scores); |
723 | | |
724 | 133k | av1_nn_softmax(scores, probs, num_classes); |
725 | | |
726 | | // Determine if we should prune rectangular partitions. |
727 | 133k | if (probs[PARTITION_HORZ] <= prune_thresh) { |
728 | 133k | part_state->prune_rect_part[HORZ] = 1; |
729 | 133k | } |
730 | 133k | if (probs[PARTITION_VERT] <= prune_thresh) { |
731 | 126k | part_state->prune_rect_part[VERT] = 1; |
732 | 126k | } |
733 | 133k | } |
734 | | |
735 | | // Early terminates PARTITION_NONE using simple_motion_search features and the |
736 | | // rate, distortion, and rdcost of PARTITION_NONE. This is only called when: |
737 | | // - The frame is a show frame |
738 | | // - The frame is not intra only |
739 | | // - The current bsize is > BLOCK_8X8 |
740 | | // - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols |
741 | | void av1_simple_motion_search_early_term_none( |
742 | | AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, |
743 | 13.3k | const RD_STATS *none_rdc, PartitionSearchState *part_state) { |
744 | 13.3k | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
745 | 13.3k | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
746 | 13.3k | const BLOCK_SIZE bsize = blk_params->bsize; |
747 | | |
748 | 13.3k | float features[FEATURE_SIZE_SMS_TERM_NONE] = { 0.0f }; |
749 | 13.3k | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
750 | 13.3k | bsize, features, |
751 | 13.3k | FEATURE_SMS_PRUNE_PART_FLAG); |
752 | 13.3k | int f_idx = FEATURE_SIZE_SMS_PRUNE_PART; |
753 | | |
754 | 13.3k | features[f_idx++] = log1pf((float)none_rdc->rate); |
755 | 13.3k | features[f_idx++] = log1pf((float)none_rdc->dist); |
756 | 13.3k | features[f_idx++] = log1pf((float)none_rdc->rdcost); |
757 | | |
758 | 13.3k | assert(f_idx == FEATURE_SIZE_SMS_TERM_NONE); |
759 | | |
760 | 13.3k | const float *ml_mean = NULL; |
761 | 13.3k | const float *ml_std = NULL; |
762 | 13.3k | const float *ml_model = NULL; |
763 | | |
764 | 13.3k | if (bsize == BLOCK_128X128) { |
765 | 0 | ml_mean = av1_simple_motion_search_term_none_mean_128; |
766 | 0 | ml_std = av1_simple_motion_search_term_none_std_128; |
767 | 0 | ml_model = av1_simple_motion_search_term_none_model_128; |
768 | 13.3k | } else if (bsize == BLOCK_64X64) { |
769 | 3.83k | ml_mean = av1_simple_motion_search_term_none_mean_64; |
770 | 3.83k | ml_std = av1_simple_motion_search_term_none_std_64; |
771 | 3.83k | ml_model = av1_simple_motion_search_term_none_model_64; |
772 | 9.48k | } else if (bsize == BLOCK_32X32) { |
773 | 8.34k | ml_mean = av1_simple_motion_search_term_none_mean_32; |
774 | 8.34k | ml_std = av1_simple_motion_search_term_none_std_32; |
775 | 8.34k | ml_model = av1_simple_motion_search_term_none_model_32; |
776 | 8.34k | } else if (bsize == BLOCK_16X16) { |
777 | 1.14k | ml_mean = av1_simple_motion_search_term_none_mean_16; |
778 | 1.14k | ml_std = av1_simple_motion_search_term_none_std_16; |
779 | 1.14k | ml_model = av1_simple_motion_search_term_none_model_16; |
780 | 18.4E | } else { |
781 | 18.4E | assert(0 && "Unexpected block size in simple_motion_term_none"); |
782 | 18.4E | } |
783 | | |
784 | | // Write features to file |
785 | 13.3k | write_features_to_file(cpi->oxcf.partition_info_path, |
786 | 13.3k | cpi->ext_part_controller.test_mode, features, |
787 | 13.3k | FEATURE_SIZE_SMS_TERM_NONE, 3, bsize, mi_row, mi_col); |
788 | | |
789 | 13.3k | if (ext_ml_model_decision_after_none_part2( |
790 | 13.3k | cpi, features, &part_state->terminate_partition_search)) { |
791 | 0 | return; |
792 | 0 | } |
793 | | |
794 | 13.3k | if (ml_model) { |
795 | 13.3k | float score = 0.0f; |
796 | 385k | for (f_idx = 0; f_idx < FEATURE_SIZE_SMS_TERM_NONE; f_idx++) { |
797 | 372k | score += |
798 | 372k | ml_model[f_idx] * (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; |
799 | 372k | } |
800 | 13.3k | score += ml_model[FEATURE_SIZE_SMS_TERM_NONE]; |
801 | | |
802 | 13.3k | if (score >= 0.0f) { |
803 | 700 | part_state->terminate_partition_search = 1; |
804 | 700 | } |
805 | 13.3k | } |
806 | 13.3k | } |
807 | | |
808 | | void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x, |
809 | | int mi_row, int mi_col, |
810 | 0 | float *features) { |
811 | 0 | AV1_COMMON *const cm = &cpi->common; |
812 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
813 | 0 | const BLOCK_SIZE sb_size = cm->seq_params->sb_size; |
814 | | |
815 | | // Currently this only allows 128X128 SB size. May extend it to 64X64 SB size. |
816 | 0 | assert(sb_size == BLOCK_128X128); |
817 | |
|
818 | 0 | int f_idx = 0; |
819 | |
|
820 | 0 | const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
821 | 0 | const float log_q_sq = log1pf((float)(dc_q * dc_q) / 256.0f); |
822 | | |
823 | | // Perform full-pixel single motion search in Y plane of 16x16 mbs in the sb |
824 | 0 | float sum_mv_row_sq = 0; |
825 | 0 | float sum_mv_row = 0; |
826 | 0 | float min_abs_mv_row = FLT_MAX; |
827 | 0 | float max_abs_mv_row = 0; |
828 | |
|
829 | 0 | float sum_mv_col_sq = 0; |
830 | 0 | float sum_mv_col = 0; |
831 | 0 | float min_abs_mv_col = FLT_MAX; |
832 | 0 | float max_abs_mv_col = 0; |
833 | |
|
834 | 0 | float sum_log_sse_sq = 0; |
835 | 0 | float sum_log_sse = 0; |
836 | 0 | float min_log_sse = FLT_MAX; |
837 | 0 | float max_log_sse = 0; |
838 | |
|
839 | 0 | const BLOCK_SIZE mb_size = BLOCK_16X16; |
840 | 0 | const int mb_rows = block_size_high[sb_size] / block_size_high[mb_size]; |
841 | 0 | const int mb_cols = block_size_wide[sb_size] / block_size_wide[mb_size]; |
842 | 0 | const int mb_in_mi_size_high_log2 = mi_size_high_log2[mb_size]; |
843 | 0 | const int mb_in_mi_size_wide_log2 = mi_size_wide_log2[mb_size]; |
844 | |
|
845 | 0 | for (int mb_row = 0; mb_row < mb_rows; mb_row++) |
846 | 0 | for (int mb_col = 0; mb_col < mb_cols; mb_col++) { |
847 | 0 | const int this_mi_row = mi_row + (mb_row << mb_in_mi_size_high_log2); |
848 | 0 | const int this_mi_col = mi_col + (mb_col << mb_in_mi_size_wide_log2); |
849 | 0 | unsigned int sse = 0; |
850 | 0 | unsigned int var = 0; |
851 | 0 | const FULLPEL_MV start_mv = kZeroFullMv; |
852 | 0 | const MV_REFERENCE_FRAME ref = |
853 | 0 | cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; |
854 | 0 | const int_mv best_mv = av1_simple_motion_search_sse_var( |
855 | 0 | cpi, x, this_mi_row, this_mi_col, mb_size, ref, start_mv, 1, 0, &sse, |
856 | 0 | &var); |
857 | |
|
858 | 0 | const float mv_row = (float)(best_mv.as_mv.row / 8); |
859 | 0 | const float mv_col = (float)(best_mv.as_mv.col / 8); |
860 | 0 | const float log_sse = log1pf((float)sse); |
861 | 0 | const float abs_mv_row = fabsf(mv_row); |
862 | 0 | const float abs_mv_col = fabsf(mv_col); |
863 | |
|
864 | 0 | sum_mv_row_sq += mv_row * mv_row; |
865 | 0 | sum_mv_row += mv_row; |
866 | 0 | sum_mv_col_sq += mv_col * mv_col; |
867 | 0 | sum_mv_col += mv_col; |
868 | |
|
869 | 0 | if (abs_mv_row < min_abs_mv_row) min_abs_mv_row = abs_mv_row; |
870 | 0 | if (abs_mv_row > max_abs_mv_row) max_abs_mv_row = abs_mv_row; |
871 | 0 | if (abs_mv_col < min_abs_mv_col) min_abs_mv_col = abs_mv_col; |
872 | 0 | if (abs_mv_col > max_abs_mv_col) max_abs_mv_col = abs_mv_col; |
873 | |
|
874 | 0 | sum_log_sse_sq += log_sse * log_sse; |
875 | 0 | sum_log_sse += log_sse; |
876 | 0 | if (log_sse < min_log_sse) min_log_sse = log_sse; |
877 | 0 | if (log_sse > max_log_sse) max_log_sse = log_sse; |
878 | 0 | } |
879 | 0 | const int blks = mb_rows * mb_cols; |
880 | 0 | const float avg_mv_row = sum_mv_row / (float)blks; |
881 | 0 | const float var_mv_row = |
882 | 0 | sum_mv_row_sq / (float)blks - avg_mv_row * avg_mv_row; |
883 | |
|
884 | 0 | const float avg_mv_col = sum_mv_col / (float)blks; |
885 | 0 | const float var_mv_col = |
886 | 0 | sum_mv_col_sq / (float)blks - avg_mv_col * avg_mv_col; |
887 | |
|
888 | 0 | const float avg_log_sse = sum_log_sse / (float)blks; |
889 | 0 | const float var_log_sse = |
890 | 0 | sum_log_sse_sq / (float)blks - avg_log_sse * avg_log_sse; |
891 | |
|
892 | 0 | features[f_idx++] = avg_log_sse; |
893 | 0 | features[f_idx++] = avg_mv_col; |
894 | 0 | features[f_idx++] = avg_mv_row; |
895 | 0 | features[f_idx++] = log_q_sq; |
896 | 0 | features[f_idx++] = max_abs_mv_col; |
897 | 0 | features[f_idx++] = max_abs_mv_row; |
898 | 0 | features[f_idx++] = max_log_sse; |
899 | 0 | features[f_idx++] = min_abs_mv_col; |
900 | 0 | features[f_idx++] = min_abs_mv_row; |
901 | 0 | features[f_idx++] = min_log_sse; |
902 | 0 | features[f_idx++] = var_log_sse; |
903 | 0 | features[f_idx++] = var_mv_col; |
904 | 0 | features[f_idx++] = var_mv_row; |
905 | |
|
906 | 0 | assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED); |
907 | 0 | } |
908 | | |
909 | | // Convert result index to block size. |
910 | | // result idx block size |
911 | | // 0 BLOCK_16X16 |
912 | | // 1 BLOCK_32X32 |
913 | | // 2 BLOCK_64X64 |
914 | | // 3 BLOCK_128X128 |
915 | 0 | static BLOCK_SIZE get_block_size(int idx) { |
916 | 0 | return (BLOCK_SIZE)((idx + 2) * 3); |
917 | 0 | } |
918 | | |
919 | | BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi, |
920 | | const MACROBLOCK *const x, |
921 | 0 | const float *features) { |
922 | 0 | float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; |
923 | 0 | const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config; |
924 | |
|
925 | 0 | assert(cpi->sf.part_sf.auto_max_partition_based_on_simple_motion != |
926 | 0 | NOT_IN_USE); |
927 | |
|
928 | 0 | av1_nn_predict(features, nn_config, 1, scores); |
929 | |
|
930 | 0 | int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; |
931 | 0 | if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == |
932 | 0 | DIRECT_PRED) { |
933 | 0 | result = 0; |
934 | 0 | float max_score = scores[0]; |
935 | 0 | for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) { |
936 | 0 | if (scores[i] > max_score) { |
937 | 0 | max_score = scores[i]; |
938 | 0 | result = i; |
939 | 0 | } |
940 | 0 | } |
941 | 0 | return get_block_size(result); |
942 | 0 | } |
943 | | |
944 | 0 | float probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; |
945 | 0 | av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED); |
946 | |
|
947 | 0 | if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == |
948 | 0 | RELAXED_PRED) { |
949 | 0 | for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; |
950 | 0 | --result) { |
951 | 0 | if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { |
952 | 0 | probs[result] += probs[result + 1]; |
953 | 0 | } |
954 | 0 | if (probs[result] > 0.2) break; |
955 | 0 | } |
956 | 0 | } else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == |
957 | 0 | ADAPT_PRED) { |
958 | 0 | const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size; |
959 | | // TODO(debargha): x->source_variance is unavailable at this point, |
960 | | // so compute. The redundant recomputation later can be removed. |
961 | 0 | const unsigned int source_variance = av1_get_perpixel_variance_facade( |
962 | 0 | cpi, &x->e_mbd, &x->plane[0].src, sb_size, AOM_PLANE_Y); |
963 | 0 | if (source_variance > 16) { |
964 | 0 | const double thresh = source_variance < 128 ? 0.05 : 0.1; |
965 | 0 | for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; |
966 | 0 | --result) { |
967 | 0 | if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { |
968 | 0 | probs[result] += probs[result + 1]; |
969 | 0 | } |
970 | 0 | if (probs[result] > thresh) break; |
971 | 0 | } |
972 | 0 | } |
973 | 0 | } |
974 | |
|
975 | 0 | return get_block_size(result); |
976 | 0 | } |
977 | | |
978 | | // Get the minimum partition block width and height(in log scale) under a |
979 | | // SIMPLE_MOTION_DATA_TREE. |
980 | | static inline void get_min_bsize(const SIMPLE_MOTION_DATA_TREE *sms_tree, |
981 | 0 | int *min_bw, int *min_bh) { |
982 | 0 | if (!sms_tree) return; |
983 | | |
984 | 0 | const BLOCK_SIZE bsize = sms_tree->block_size; |
985 | 0 | if (bsize == BLOCK_4X4) { |
986 | 0 | *min_bw = 0; |
987 | 0 | *min_bh = 0; |
988 | 0 | return; |
989 | 0 | } |
990 | | |
991 | 0 | PARTITION_TYPE part_type = sms_tree->partitioning; |
992 | 0 | if (part_type == PARTITION_INVALID) return; |
993 | | |
994 | 0 | if (part_type == PARTITION_SPLIT) { |
995 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
996 | 0 | get_min_bsize(sms_tree->split[i], min_bw, min_bh); |
997 | 0 | } |
998 | 0 | } else { |
999 | 0 | if (part_type == PARTITION_HORZ_A || part_type == PARTITION_HORZ_B || |
1000 | 0 | part_type == PARTITION_VERT_A || part_type == PARTITION_VERT_B) |
1001 | 0 | part_type = PARTITION_SPLIT; |
1002 | 0 | const BLOCK_SIZE subsize = get_partition_subsize(bsize, part_type); |
1003 | 0 | if (subsize != BLOCK_INVALID) { |
1004 | 0 | *min_bw = AOMMIN(*min_bw, mi_size_wide_log2[subsize]); |
1005 | 0 | *min_bh = AOMMIN(*min_bh, mi_size_high_log2[subsize]); |
1006 | 0 | } |
1007 | 0 | } |
1008 | 0 | } |
1009 | | |
1010 | | static inline void add_rd_feature(int64_t rd, int64_t best_rd, float *features, |
1011 | 0 | int *feature_idx) { |
1012 | 0 | const int rd_valid = rd > 0 && rd < INT64_MAX; |
1013 | 0 | const float rd_ratio = rd_valid ? (float)rd / best_rd : 1.0f; |
1014 | 0 | features[(*feature_idx)++] = (float)rd_valid; |
1015 | 0 | features[(*feature_idx)++] = rd_ratio; |
1016 | 0 | } |
1017 | | |
1018 | 0 | #define FEATURES 31 |
1019 | | void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x, |
1020 | | SIMPLE_MOTION_DATA_TREE *const sms_tree, |
1021 | | int64_t best_rd, int64_t part_none_rd, |
1022 | | int64_t part_split_rd, |
1023 | | int64_t *split_block_rd, |
1024 | 0 | PartitionSearchState *part_state) { |
1025 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1026 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
1027 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1028 | |
|
1029 | 0 | if (best_rd <= 0 || best_rd == INT64_MAX || |
1030 | 0 | part_state->terminate_partition_search) |
1031 | 0 | return; |
1032 | | |
1033 | 0 | const AV1_COMMON *const cm = &cpi->common; |
1034 | 0 | const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; |
1035 | 0 | const NN_CONFIG *nn_config = NULL; |
1036 | 0 | float thresh = -1e6; |
1037 | 0 | switch (bsize) { |
1038 | 0 | case BLOCK_128X128: |
1039 | 0 | nn_config = &av1_early_term_after_split_nnconfig_64; |
1040 | 0 | thresh = is_480p_or_larger ? -2.0f : -1.2f; |
1041 | 0 | break; |
1042 | 0 | case BLOCK_64X64: |
1043 | 0 | nn_config = &av1_early_term_after_split_nnconfig_64; |
1044 | 0 | thresh = is_480p_or_larger ? -2.0f : -1.2f; |
1045 | 0 | break; |
1046 | 0 | case BLOCK_32X32: |
1047 | 0 | nn_config = &av1_early_term_after_split_nnconfig_32; |
1048 | 0 | thresh = is_480p_or_larger ? -2.6f : -2.3f; |
1049 | 0 | break; |
1050 | 0 | case BLOCK_16X16: |
1051 | 0 | nn_config = &av1_early_term_after_split_nnconfig_16; |
1052 | 0 | thresh = is_480p_or_larger ? -2.0f : -2.4f; |
1053 | 0 | break; |
1054 | 0 | case BLOCK_8X8: |
1055 | 0 | nn_config = &av1_early_term_after_split_nnconfig_8; |
1056 | 0 | thresh = is_480p_or_larger ? -1.0f : -1.4f; |
1057 | 0 | break; |
1058 | 0 | case BLOCK_4X4: break; |
1059 | 0 | default: |
1060 | 0 | assert(0 && "Invalid block size in av1_ml_early_term_after_split()."); |
1061 | 0 | break; |
1062 | 0 | } |
1063 | 0 | if (!nn_config) return; |
1064 | | |
1065 | | // Use more conservative threshold for level 1. |
1066 | 0 | if (cpi->sf.part_sf.ml_early_term_after_part_split_level < 2) thresh -= 0.3f; |
1067 | |
|
1068 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1069 | 0 | const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); |
1070 | 0 | const int bs = block_size_wide[bsize]; |
1071 | 0 | int f_idx = 0; |
1072 | 0 | float features[FEATURES] = { 0.0f }; |
1073 | |
|
1074 | 0 | features[f_idx++] = log1pf((float)dc_q / 4.0f); |
1075 | 0 | features[f_idx++] = log1pf((float)best_rd / bs / bs / 1024.0f); |
1076 | |
|
1077 | 0 | add_rd_feature(part_none_rd, best_rd, features, &f_idx); |
1078 | 0 | add_rd_feature(part_split_rd, best_rd, features, &f_idx); |
1079 | |
|
1080 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1081 | 0 | add_rd_feature(split_block_rd[i], best_rd, features, &f_idx); |
1082 | 0 | int min_bw = MAX_SB_SIZE_LOG2; |
1083 | 0 | int min_bh = MAX_SB_SIZE_LOG2; |
1084 | 0 | get_min_bsize(sms_tree->split[i], &min_bw, &min_bh); |
1085 | 0 | features[f_idx++] = (float)min_bw; |
1086 | 0 | features[f_idx++] = (float)min_bh; |
1087 | 0 | } |
1088 | |
|
1089 | 0 | simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, |
1090 | 0 | bsize, NULL, |
1091 | 0 | FEATURE_SMS_PRUNE_PART_FLAG); |
1092 | |
|
1093 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_none_feat[1]); |
1094 | |
|
1095 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[0]->sms_none_feat[1]); |
1096 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[1]->sms_none_feat[1]); |
1097 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[2]->sms_none_feat[1]); |
1098 | 0 | features[f_idx++] = log1pf((float)sms_tree->split[3]->sms_none_feat[1]); |
1099 | |
|
1100 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[1]); |
1101 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[3]); |
1102 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[5]); |
1103 | 0 | features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[7]); |
1104 | |
|
1105 | 0 | assert(f_idx == FEATURES); |
1106 | | |
1107 | | // Write features to file |
1108 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1109 | 0 | cpi->ext_part_controller.test_mode, features, FEATURES, |
1110 | 0 | 4, bsize, mi_row, mi_col); |
1111 | |
|
1112 | 0 | if (ext_ml_model_decision_after_split( |
1113 | 0 | cpi, features, &part_state->terminate_partition_search)) { |
1114 | 0 | return; |
1115 | 0 | } |
1116 | | |
1117 | 0 | float score = 0.0f; |
1118 | 0 | av1_nn_predict(features, nn_config, 1, &score); |
1119 | | // Score is indicator of confidence that we should NOT terminate. |
1120 | 0 | if (score < thresh) { |
1121 | 0 | part_state->terminate_partition_search = 1; |
1122 | 0 | } |
1123 | 0 | } |
1124 | | #undef FEATURES |
1125 | | |
1126 | | void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x, |
1127 | | int64_t best_rd, int64_t none_rd, |
1128 | | const int64_t *split_rd, |
1129 | 0 | PartitionSearchState *part_state) { |
1130 | 0 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1131 | 0 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
1132 | 0 | const BLOCK_SIZE bsize = blk_params->bsize; |
1133 | |
|
1134 | 0 | if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; |
1135 | 0 | best_rd = AOMMAX(best_rd, 1); |
1136 | 0 | const NN_CONFIG *nn_config = NULL; |
1137 | 0 | const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f }; |
1138 | 0 | float cur_thresh = 0.0f; |
1139 | 0 | switch (bsize) { |
1140 | 0 | case BLOCK_8X8: |
1141 | 0 | nn_config = &av1_rect_partition_nnconfig_8; |
1142 | 0 | cur_thresh = prob_thresholds[0]; |
1143 | 0 | break; |
1144 | 0 | case BLOCK_16X16: |
1145 | 0 | nn_config = &av1_rect_partition_nnconfig_16; |
1146 | 0 | cur_thresh = prob_thresholds[1]; |
1147 | 0 | break; |
1148 | 0 | case BLOCK_32X32: |
1149 | 0 | nn_config = &av1_rect_partition_nnconfig_32; |
1150 | 0 | cur_thresh = prob_thresholds[2]; |
1151 | 0 | break; |
1152 | 0 | case BLOCK_64X64: |
1153 | 0 | nn_config = &av1_rect_partition_nnconfig_64; |
1154 | 0 | cur_thresh = prob_thresholds[3]; |
1155 | 0 | break; |
1156 | 0 | case BLOCK_128X128: |
1157 | 0 | nn_config = &av1_rect_partition_nnconfig_128; |
1158 | 0 | cur_thresh = prob_thresholds[4]; |
1159 | 0 | break; |
1160 | 0 | default: assert(0 && "Unexpected bsize."); |
1161 | 0 | } |
1162 | 0 | if (!nn_config) return; |
1163 | | |
1164 | | // 1. Compute input features |
1165 | 0 | float features[9]; |
1166 | | |
1167 | | // RD cost ratios |
1168 | 0 | for (int i = 0; i < 5; i++) features[i] = 1.0f; |
1169 | 0 | if (none_rd > 0 && none_rd < 1000000000) |
1170 | 0 | features[0] = (float)none_rd / (float)best_rd; |
1171 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { |
1172 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1173 | 0 | features[1 + i] = (float)split_rd[i] / (float)best_rd; |
1174 | 0 | } |
1175 | | |
1176 | | // Variance ratios |
1177 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1178 | 0 | int whole_block_variance; |
1179 | 0 | whole_block_variance = av1_get_perpixel_variance_facade( |
1180 | 0 | cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y); |
1181 | 0 | whole_block_variance = AOMMAX(whole_block_variance, 1); |
1182 | |
|
1183 | 0 | int split_variance[SUB_PARTITIONS_SPLIT]; |
1184 | 0 | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
1185 | 0 | struct buf_2d buf; |
1186 | 0 | buf.stride = x->plane[0].src.stride; |
1187 | 0 | const int bw = block_size_wide[bsize]; |
1188 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1189 | 0 | const int x_idx = (i & 1) * bw / 2; |
1190 | 0 | const int y_idx = (i >> 1) * bw / 2; |
1191 | 0 | buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; |
1192 | 0 | split_variance[i] = |
1193 | 0 | av1_get_perpixel_variance_facade(cpi, xd, &buf, subsize, AOM_PLANE_Y); |
1194 | 0 | } |
1195 | |
|
1196 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) |
1197 | 0 | features[5 + i] = (float)split_variance[i] / (float)whole_block_variance; |
1198 | | |
1199 | | // Write features to file |
1200 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1201 | 0 | cpi->ext_part_controller.test_mode, features, |
1202 | 0 | /*feature_size=*/9, 5, bsize, mi_row, mi_col); |
1203 | |
|
1204 | 0 | if (ext_ml_model_decision_after_split_part2( |
1205 | 0 | &cpi->ext_part_controller, frame_is_intra_only(&cpi->common), |
1206 | 0 | features, &part_state->prune_rect_part[HORZ], |
1207 | 0 | &part_state->prune_rect_part[VERT])) { |
1208 | 0 | return; |
1209 | 0 | } |
1210 | | |
1211 | | // 2. Do the prediction and prune 0-2 partitions based on their probabilities |
1212 | 0 | float raw_scores[3] = { 0.0f }; |
1213 | 0 | av1_nn_predict(features, nn_config, 1, raw_scores); |
1214 | 0 | float probs[3] = { 0.0f }; |
1215 | 0 | av1_nn_softmax(raw_scores, probs, 3); |
1216 | | |
1217 | | // probs[0] is the probability of the fact that both rectangular partitions |
1218 | | // are worse than current best_rd |
1219 | 0 | if (probs[1] <= cur_thresh) part_state->prune_rect_part[HORZ] = 1; |
1220 | 0 | if (probs[2] <= cur_thresh) part_state->prune_rect_part[VERT] = 1; |
1221 | 0 | } |
1222 | | |
1223 | | // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be |
1224 | | // considered. |
1225 | | static void ml_prune_ab_partition(AV1_COMP *const cpi, int part_ctx, |
1226 | | int var_ctx, int64_t best_rd, |
1227 | | PartitionSearchState *part_state, |
1228 | 0 | int *ab_partitions_allowed) { |
1229 | 0 | const PartitionBlkParams blk_params = part_state->part_blk_params; |
1230 | 0 | const int mi_row = blk_params.mi_row; |
1231 | 0 | const int mi_col = blk_params.mi_col; |
1232 | 0 | const BLOCK_SIZE bsize = blk_params.bsize; |
1233 | |
|
1234 | 0 | if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; |
1235 | 0 | const NN_CONFIG *nn_config = NULL; |
1236 | 0 | switch (bsize) { |
1237 | 0 | case BLOCK_8X8: nn_config = NULL; break; |
1238 | 0 | case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break; |
1239 | 0 | case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break; |
1240 | 0 | case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break; |
1241 | 0 | case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break; |
1242 | 0 | default: assert(0 && "Unexpected bsize."); |
1243 | 0 | } |
1244 | 0 | if (!nn_config) return; |
1245 | | |
1246 | | // Generate features. |
1247 | 0 | float features[10]; |
1248 | 0 | int feature_index = 0; |
1249 | 0 | features[feature_index++] = (float)part_ctx; |
1250 | 0 | features[feature_index++] = (float)var_ctx; |
1251 | 0 | const int rdcost = (int)AOMMIN(INT_MAX, best_rd); |
1252 | 0 | int sub_block_rdcost[8] = { 0 }; |
1253 | 0 | int rd_index = 0; |
1254 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1255 | 0 | const int64_t *horz_rd = part_state->rect_part_rd[HORZ]; |
1256 | 0 | if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) |
1257 | 0 | sub_block_rdcost[rd_index] = (int)horz_rd[i]; |
1258 | 0 | ++rd_index; |
1259 | 0 | } |
1260 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1261 | 0 | const int64_t *vert_rd = part_state->rect_part_rd[VERT]; |
1262 | 0 | if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) |
1263 | 0 | sub_block_rdcost[rd_index] = (int)vert_rd[i]; |
1264 | 0 | ++rd_index; |
1265 | 0 | } |
1266 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1267 | 0 | const int64_t *split_rd = part_state->split_rd; |
1268 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1269 | 0 | sub_block_rdcost[rd_index] = (int)split_rd[i]; |
1270 | 0 | ++rd_index; |
1271 | 0 | } |
1272 | 0 | for (int i = 0; i < 8; ++i) { |
1273 | | // Ratio between the sub-block RD and the whole-block RD. |
1274 | 0 | float rd_ratio = 1.0f; |
1275 | 0 | if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) |
1276 | 0 | rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; |
1277 | 0 | features[feature_index++] = rd_ratio; |
1278 | 0 | } |
1279 | 0 | assert(feature_index == 10); |
1280 | | |
1281 | | // Write features to file |
1282 | 0 | if (!frame_is_intra_only(&cpi->common)) { |
1283 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1284 | 0 | cpi->ext_part_controller.test_mode, features, |
1285 | 0 | /*feature_size=*/10, 6, bsize, mi_row, mi_col); |
1286 | 0 | } |
1287 | |
|
1288 | 0 | if (ext_ml_model_decision_after_rect( |
1289 | 0 | &cpi->ext_part_controller, frame_is_intra_only(&cpi->common), |
1290 | 0 | features, &ab_partitions_allowed[HORZ_A], |
1291 | 0 | &ab_partitions_allowed[HORZ_B], &ab_partitions_allowed[VERT_A], |
1292 | 0 | &ab_partitions_allowed[VERT_B])) { |
1293 | 0 | return; |
1294 | 0 | } |
1295 | | |
1296 | | // Calculate scores using the NN model. |
1297 | 0 | float score[16] = { 0.0f }; |
1298 | 0 | av1_nn_predict(features, nn_config, 1, score); |
1299 | 0 | int int_score[16]; |
1300 | 0 | int max_score = -1000; |
1301 | 0 | for (int i = 0; i < 16; ++i) { |
1302 | 0 | int_score[i] = (int)(100 * score[i]); |
1303 | 0 | max_score = AOMMAX(int_score[i], max_score); |
1304 | 0 | } |
1305 | | |
1306 | | // Make decisions based on the model scores. |
1307 | 0 | int thresh = max_score; |
1308 | 0 | switch (bsize) { |
1309 | 0 | case BLOCK_16X16: thresh -= 150; break; |
1310 | 0 | case BLOCK_32X32: thresh -= 100; break; |
1311 | 0 | default: break; |
1312 | 0 | } |
1313 | 0 | av1_zero_array(ab_partitions_allowed, NUM_AB_PARTS); |
1314 | 0 | for (int i = 0; i < 16; ++i) { |
1315 | 0 | if (int_score[i] >= thresh) { |
1316 | 0 | if ((i >> 0) & 1) ab_partitions_allowed[HORZ_A] = 1; |
1317 | 0 | if ((i >> 1) & 1) ab_partitions_allowed[HORZ_B] = 1; |
1318 | 0 | if ((i >> 2) & 1) ab_partitions_allowed[VERT_A] = 1; |
1319 | 0 | if ((i >> 3) & 1) ab_partitions_allowed[VERT_B] = 1; |
1320 | 0 | } |
1321 | 0 | } |
1322 | 0 | } |
1323 | | |
1324 | 0 | #define FEATURES 18 |
1325 | 0 | #define LABELS 4 |
1326 | | // Use a ML model to predict if horz4 and vert4 should be considered. |
1327 | | void av1_ml_prune_4_partition(AV1_COMP *const cpi, MACROBLOCK *const x, |
1328 | | int part_ctx, int64_t best_rd, |
1329 | | PartitionSearchState *part_state, |
1330 | | int *part4_allowed, |
1331 | 0 | unsigned int pb_source_variance) { |
1332 | 0 | const PartitionBlkParams blk_params = part_state->part_blk_params; |
1333 | 0 | const int mi_row = blk_params.mi_row; |
1334 | 0 | const int mi_col = blk_params.mi_col; |
1335 | 0 | const BLOCK_SIZE bsize = blk_params.bsize; |
1336 | |
|
1337 | 0 | int64_t(*rect_part_rd)[SUB_PARTITIONS_RECT] = part_state->rect_part_rd; |
1338 | 0 | int64_t *split_rd = part_state->split_rd; |
1339 | 0 | if (ext_ml_model_decision_after_part_ab( |
1340 | 0 | cpi, x, bsize, part_ctx, best_rd, rect_part_rd, split_rd, |
1341 | 0 | &part4_allowed[HORZ4], &part4_allowed[VERT4], pb_source_variance, |
1342 | 0 | mi_row, mi_col)) |
1343 | 0 | return; |
1344 | | |
1345 | 0 | if (best_rd >= 1000000000) return; |
1346 | 0 | int64_t *horz_rd = rect_part_rd[HORZ4]; |
1347 | 0 | int64_t *vert_rd = rect_part_rd[VERT4]; |
1348 | 0 | const NN_CONFIG *nn_config = NULL; |
1349 | | // 4-way partitions are only allowed for these three square block sizes. |
1350 | 0 | switch (bsize) { |
1351 | 0 | case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break; |
1352 | 0 | case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break; |
1353 | 0 | case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break; |
1354 | 0 | default: assert(0 && "Unexpected bsize."); |
1355 | 0 | } |
1356 | 0 | if (!nn_config) return; |
1357 | | |
1358 | | // Generate features. |
1359 | 0 | float features[FEATURES]; |
1360 | 0 | int feature_index = 0; |
1361 | 0 | features[feature_index++] = (float)part_ctx; |
1362 | 0 | features[feature_index++] = (float)get_unsigned_bits(pb_source_variance); |
1363 | |
|
1364 | 0 | const int rdcost = (int)AOMMIN(INT_MAX, best_rd); |
1365 | 0 | int sub_block_rdcost[8] = { 0 }; |
1366 | 0 | int rd_index = 0; |
1367 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1368 | 0 | if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) |
1369 | 0 | sub_block_rdcost[rd_index] = (int)horz_rd[i]; |
1370 | 0 | ++rd_index; |
1371 | 0 | } |
1372 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1373 | 0 | if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) |
1374 | 0 | sub_block_rdcost[rd_index] = (int)vert_rd[i]; |
1375 | 0 | ++rd_index; |
1376 | 0 | } |
1377 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1378 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1379 | 0 | sub_block_rdcost[rd_index] = (int)split_rd[i]; |
1380 | 0 | ++rd_index; |
1381 | 0 | } |
1382 | 0 | for (int i = 0; i < 8; ++i) { |
1383 | | // Ratio between the sub-block RD and the whole-block RD. |
1384 | 0 | float rd_ratio = 1.0f; |
1385 | 0 | if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) |
1386 | 0 | rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; |
1387 | 0 | features[feature_index++] = rd_ratio; |
1388 | 0 | } |
1389 | | |
1390 | | // Get variance of the 1:4 and 4:1 sub-blocks. |
1391 | 0 | unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
1392 | 0 | unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
1393 | 0 | { |
1394 | 0 | BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); |
1395 | 0 | BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); |
1396 | |
|
1397 | 0 | assert(horz_4_bs != BLOCK_INVALID); |
1398 | 0 | assert(vert_4_bs != BLOCK_INVALID); |
1399 | |
|
1400 | 0 | av1_setup_src_planes(x, cpi->source, mi_row, mi_col, |
1401 | 0 | av1_num_planes(&cpi->common), bsize); |
1402 | 0 | const int src_stride = x->plane[0].src.stride; |
1403 | 0 | uint8_t *src = x->plane[0].src.buf; |
1404 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1405 | |
|
1406 | 0 | struct buf_2d horz_4_src, vert_4_src; |
1407 | 0 | horz_4_src.stride = src_stride; |
1408 | 0 | vert_4_src.stride = src_stride; |
1409 | |
|
1410 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1411 | 0 | horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride; |
1412 | 0 | vert_4_src.buf = src + i * block_size_wide[vert_4_bs]; |
1413 | |
|
1414 | 0 | horz_4_source_var[i] = av1_get_perpixel_variance_facade( |
1415 | 0 | cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y); |
1416 | 0 | vert_4_source_var[i] = av1_get_perpixel_variance_facade( |
1417 | 0 | cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y); |
1418 | 0 | } |
1419 | 0 | } |
1420 | |
|
1421 | 0 | const float denom = (float)(pb_source_variance + 1); |
1422 | 0 | const float low_b = 0.1f; |
1423 | 0 | const float high_b = 10.0f; |
1424 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1425 | | // Ratio between the 4:1 sub-block variance and the whole-block variance. |
1426 | 0 | float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; |
1427 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
1428 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
1429 | 0 | features[feature_index++] = var_ratio; |
1430 | 0 | } |
1431 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1432 | | // Ratio between the 1:4 sub-block RD and the whole-block RD. |
1433 | 0 | float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; |
1434 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
1435 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
1436 | 0 | features[feature_index++] = var_ratio; |
1437 | 0 | } |
1438 | 0 | assert(feature_index == FEATURES); |
1439 | | |
1440 | | // Write features to file |
1441 | 0 | if (!frame_is_intra_only(&cpi->common)) { |
1442 | 0 | write_features_to_file(cpi->oxcf.partition_info_path, |
1443 | 0 | cpi->ext_part_controller.test_mode, features, |
1444 | 0 | FEATURES, 7, bsize, mi_row, mi_col); |
1445 | 0 | } |
1446 | | |
1447 | | // Calculate scores using the NN model. |
1448 | 0 | float score[LABELS] = { 0.0f }; |
1449 | 0 | av1_nn_predict(features, nn_config, 1, score); |
1450 | 0 | int int_score[LABELS]; |
1451 | 0 | int max_score = -1000; |
1452 | 0 | for (int i = 0; i < LABELS; ++i) { |
1453 | 0 | int_score[i] = (int)(100 * score[i]); |
1454 | 0 | max_score = AOMMAX(int_score[i], max_score); |
1455 | 0 | } |
1456 | | |
1457 | | // Make decisions based on the model scores. |
1458 | 0 | int thresh = max_score; |
1459 | 0 | switch (bsize) { |
1460 | 0 | case BLOCK_16X16: thresh -= 500; break; |
1461 | 0 | case BLOCK_32X32: thresh -= 500; break; |
1462 | 0 | case BLOCK_64X64: thresh -= 200; break; |
1463 | 0 | default: break; |
1464 | 0 | } |
1465 | 0 | av1_zero_array(part4_allowed, NUM_PART4_TYPES); |
1466 | 0 | for (int i = 0; i < LABELS; ++i) { |
1467 | 0 | if (int_score[i] >= thresh) { |
1468 | 0 | if ((i >> 0) & 1) part4_allowed[HORZ4] = 1; |
1469 | 0 | if ((i >> 1) & 1) part4_allowed[VERT4] = 1; |
1470 | 0 | } |
1471 | 0 | } |
1472 | 0 | } |
1473 | | #undef FEATURES |
1474 | | #undef LABELS |
1475 | | |
1476 | 9 | #define FEATURES 4 |
1477 | | void av1_ml_predict_breakout(AV1_COMP *const cpi, const MACROBLOCK *const x, |
1478 | | const RD_STATS *const rd_stats, |
1479 | | unsigned int pb_source_variance, int bit_depth, |
1480 | 9 | PartitionSearchState *part_state) { |
1481 | 9 | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1482 | 9 | const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; |
1483 | 9 | const BLOCK_SIZE bsize = blk_params->bsize; |
1484 | | |
1485 | 9 | const int bsize_idx = convert_bsize_to_idx(bsize); |
1486 | 9 | if (bsize_idx < 0) return; |
1487 | 9 | const float *ml_mean = av1_hd_partition_breakout_nn_mean[bsize_idx]; |
1488 | 9 | const float *ml_std = av1_hd_partition_breakout_nn_std[bsize_idx]; |
1489 | | |
1490 | 9 | const NN_CONFIG *nn_config = NULL; |
1491 | 9 | float thresh = 0; |
1492 | 9 | switch (bsize) { |
1493 | 0 | case BLOCK_8X8: |
1494 | 0 | nn_config = |
1495 | 0 | &av1_partition_breakout_nnconfig_8 |
1496 | 0 | [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; |
1497 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; |
1498 | 0 | break; |
1499 | 9 | case BLOCK_16X16: |
1500 | 9 | nn_config = |
1501 | 9 | &av1_partition_breakout_nnconfig_16 |
1502 | 9 | [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; |
1503 | 9 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; |
1504 | 9 | break; |
1505 | 0 | case BLOCK_32X32: |
1506 | 0 | nn_config = |
1507 | 0 | &av1_partition_breakout_nnconfig_32 |
1508 | 0 | [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; |
1509 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; |
1510 | 0 | break; |
1511 | 0 | case BLOCK_64X64: |
1512 | 0 | nn_config = |
1513 | 0 | &av1_partition_breakout_nnconfig_64 |
1514 | 0 | [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; |
1515 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; |
1516 | 0 | break; |
1517 | 0 | case BLOCK_128X128: |
1518 | 0 | nn_config = |
1519 | 0 | &av1_partition_breakout_nnconfig_128 |
1520 | 0 | [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; |
1521 | 0 | thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; |
1522 | 0 | break; |
1523 | 0 | default: assert(0 && "Unexpected bsize."); |
1524 | 9 | } |
1525 | 9 | if (!nn_config || thresh < 0) return; |
1526 | | |
1527 | 9 | const float ml_predict_breakout_thresh_scale[3] = { 1.15f, 1.05f, 1.0f }; |
1528 | 9 | thresh = thresh * ml_predict_breakout_thresh_scale |
1529 | 9 | [cpi->sf.part_sf.ml_predict_breakout_level - 1]; |
1530 | | |
1531 | | // Generate feature values. |
1532 | 9 | float features[FEATURES]; |
1533 | 9 | int feature_index = 0; |
1534 | | |
1535 | 9 | const int num_pels_log2 = num_pels_log2_lookup[bsize]; |
1536 | 9 | float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX); |
1537 | 9 | rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * |
1538 | 9 | rate_f; |
1539 | 9 | features[feature_index++] = rate_f; |
1540 | | |
1541 | 9 | const float dist_f = |
1542 | 9 | (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2); |
1543 | 9 | features[feature_index++] = dist_f; |
1544 | | |
1545 | 9 | features[feature_index++] = (float)pb_source_variance; |
1546 | | |
1547 | 9 | const int dc_q = (int)x->plane[0].dequant_QTX[0] >> (bit_depth - 8); |
1548 | 9 | features[feature_index++] = (float)(dc_q * dc_q) / 256.0f; |
1549 | 9 | assert(feature_index == FEATURES); |
1550 | | |
1551 | 9 | if (cpi->sf.part_sf.ml_partition_search_breakout_model_index) { |
1552 | 0 | for (int idx = 0; idx < FEATURES; idx++) { |
1553 | 0 | features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; |
1554 | 0 | } |
1555 | 0 | } |
1556 | | |
1557 | | // Write features to file |
1558 | 9 | write_features_to_file(cpi->oxcf.partition_info_path, |
1559 | 9 | cpi->ext_part_controller.test_mode, features, FEATURES, |
1560 | 9 | 2, bsize, mi_row, mi_col); |
1561 | | |
1562 | 9 | if (ext_ml_model_decision_after_none(&cpi->ext_part_controller, |
1563 | 9 | frame_is_intra_only(&cpi->common), |
1564 | 9 | features, &part_state->do_square_split, |
1565 | 9 | &part_state->do_rectangular_split)) { |
1566 | 0 | return; |
1567 | 0 | } |
1568 | | |
1569 | | // Calculate score using the NN model. |
1570 | 9 | float score = 0.0f; |
1571 | 9 | av1_nn_predict(features, nn_config, 1, &score); |
1572 | | |
1573 | 9 | float thresh_score = (float)log(thresh / (1 - thresh)); |
1574 | | |
1575 | | // Make decision. |
1576 | 9 | if (score >= thresh_score) { |
1577 | 0 | part_state->do_square_split = 0; |
1578 | 0 | part_state->do_rectangular_split = 0; |
1579 | 0 | } |
1580 | 9 | } |
1581 | | #undef FEATURES |
1582 | | |
1583 | | void av1_prune_partitions_before_search(AV1_COMP *const cpi, |
1584 | | MACROBLOCK *const x, |
1585 | | SIMPLE_MOTION_DATA_TREE *const sms_tree, |
1586 | 8.65M | PartitionSearchState *part_state) { |
1587 | 8.65M | const AV1_COMMON *const cm = &cpi->common; |
1588 | 8.65M | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
1589 | | |
1590 | 8.65M | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1591 | 8.65M | const BLOCK_SIZE bsize = blk_params->bsize; |
1592 | | |
1593 | | #if CONFIG_THREE_PASS |
1594 | | if (cpi->third_pass_ctx) { |
1595 | | int mi_row = blk_params->mi_row; |
1596 | | int mi_col = blk_params->mi_col; |
1597 | | double ratio_h, ratio_w; |
1598 | | av1_get_third_pass_ratio(cpi->third_pass_ctx, 0, cm->height, cm->width, |
1599 | | &ratio_h, &ratio_w); |
1600 | | THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( |
1601 | | cpi->third_pass_ctx, 0, mi_row, mi_col, ratio_h, ratio_w); |
1602 | | BLOCK_SIZE third_pass_bsize = |
1603 | | av1_get_third_pass_adjusted_blk_size(this_mi, ratio_h, ratio_w); |
1604 | | // check the actual partition of this block in the second pass |
1605 | | PARTITION_TYPE third_pass_part = |
1606 | | av1_third_pass_get_sb_part_type(cpi->third_pass_ctx, this_mi); |
1607 | | |
1608 | | int is_edge = (mi_row + mi_size_high[bsize] >= cm->mi_params.mi_rows) || |
1609 | | (mi_col + mi_size_wide[bsize] >= cm->mi_params.mi_cols); |
1610 | | |
1611 | | if (!is_edge && block_size_wide[bsize] >= 16) { |
1612 | | // If in second pass we used rectangular partition, then do not search for |
1613 | | // rectangular partition in the different direction. |
1614 | | if (third_pass_part != PARTITION_NONE) { |
1615 | | if (third_pass_part == PARTITION_HORZ || |
1616 | | third_pass_part == PARTITION_HORZ_4 || |
1617 | | third_pass_part == PARTITION_HORZ_A || |
1618 | | third_pass_part == PARTITION_HORZ_B) { |
1619 | | part_state->partition_rect_allowed[VERT] = 0; |
1620 | | } else if (third_pass_part == PARTITION_VERT || |
1621 | | third_pass_part == PARTITION_VERT_4 || |
1622 | | third_pass_part == PARTITION_VERT_A || |
1623 | | third_pass_part == PARTITION_VERT_B) { |
1624 | | part_state->partition_rect_allowed[HORZ] = 0; |
1625 | | } |
1626 | | } |
1627 | | |
1628 | | int minSize = AOMMIN(block_size_wide[third_pass_bsize], |
1629 | | block_size_high[third_pass_bsize]); |
1630 | | int maxSize = AOMMAX(block_size_wide[third_pass_bsize], |
1631 | | block_size_high[third_pass_bsize]); |
1632 | | if (block_size_wide[bsize] < minSize / 4) { |
1633 | | // Current partition is too small, just terminate |
1634 | | part_state->terminate_partition_search = 1; |
1635 | | return; |
1636 | | } else if (block_size_wide[bsize] < minSize / 2) { |
1637 | | if (third_pass_part != PARTITION_NONE) { |
1638 | | // Current partition is very small, and in second pass we used |
1639 | | // rectangular partition. Terminate the search here then. |
1640 | | part_state->terminate_partition_search = 1; |
1641 | | return; |
1642 | | } else { |
1643 | | // Partition is small, but we still check this partition, only disable |
1644 | | // further splits. |
1645 | | // TODO(any): check why this is not covered by the termination for < |
1646 | | // minSize/4. |
1647 | | av1_disable_square_split_partition(part_state); |
1648 | | av1_disable_rect_partitions(part_state); |
1649 | | return; |
1650 | | } |
1651 | | } else if (block_size_wide[bsize] > maxSize) { |
1652 | | // Partition is larger than in the second pass. Only allow split. |
1653 | | av1_set_square_split_only(part_state); |
1654 | | return; |
1655 | | } else if (block_size_wide[bsize] >= minSize && |
1656 | | block_size_wide[bsize] <= maxSize) { |
1657 | | // Partition is within a range where it is very likely to find a good |
1658 | | // choice, so do not prune anything. |
1659 | | return; |
1660 | | } |
1661 | | } |
1662 | | } |
1663 | | #endif // CONFIG_THREE_PASS |
1664 | | |
1665 | | // Prune rectangular partitions for larger blocks. |
1666 | 8.65M | if (bsize > cpi->sf.part_sf.rect_partition_eval_thresh) { |
1667 | 0 | part_state->do_rectangular_split = 0; |
1668 | 0 | part_state->partition_rect_allowed[HORZ] = 0; |
1669 | 0 | part_state->partition_rect_allowed[VERT] = 0; |
1670 | 0 | } |
1671 | | |
1672 | | // Prune rectangular, AB and 4-way partition based on q index and block size |
1673 | 8.65M | if (cpi->sf.part_sf.prune_rectangular_split_based_on_qidx == 1) { |
1674 | 0 | if (bsize == BLOCK_8X8 && x->qindex < 35) |
1675 | 0 | av1_disable_rect_partitions(part_state); |
1676 | |
|
1677 | 8.65M | } else if (cpi->sf.part_sf.prune_rectangular_split_based_on_qidx == 2) { |
1678 | | // Enumeration difference between two square partitions |
1679 | 6.55M | const int sqr_bsize_step = BLOCK_32X32 - BLOCK_16X16; |
1680 | 6.55M | int max_bsize = |
1681 | 6.55M | BLOCK_32X32 - (x->qindex * 3 / QINDEX_RANGE) * sqr_bsize_step; |
1682 | 6.55M | max_bsize = AOMMAX(max_bsize, BLOCK_4X4); |
1683 | 6.55M | const BLOCK_SIZE max_prune_bsize = |
1684 | 6.55M | (BLOCK_SIZE)AOMMIN(max_bsize, BLOCK_32X32); |
1685 | | |
1686 | | // Prune partition |
1687 | | // qidx 0 to 85: prune bsize below BLOCK_32X32 |
1688 | | // qidx 86 to 170: prune bsize below BLOCK_16X16 |
1689 | | // qidx 171 to 255: prune bsize below BLOCK_8X8 |
1690 | 6.55M | if (bsize < max_prune_bsize) { |
1691 | 5.22M | av1_disable_rect_partitions(part_state); |
1692 | 5.22M | } |
1693 | 6.55M | } |
1694 | | |
1695 | 8.65M | if (cpi->sf.part_sf.prune_sub_8x8_partition_level && (bsize == BLOCK_8X8)) { |
1696 | 3.30M | const MACROBLOCKD *const xd = &x->e_mbd; |
1697 | 3.30M | int prune_sub_8x8; |
1698 | 3.30M | if (cpi->sf.part_sf.prune_sub_8x8_partition_level == 2) { |
1699 | 1.74M | prune_sub_8x8 = 1; |
1700 | 1.74M | } else { |
1701 | 1.56M | assert(cpi->sf.part_sf.prune_sub_8x8_partition_level == 1); |
1702 | | // Prune if both neighbors are available and either is > BLOCK_8X8 |
1703 | 1.56M | prune_sub_8x8 = xd->left_available && xd->up_available && |
1704 | 957k | (xd->left_mbmi->bsize > BLOCK_8X8 || |
1705 | 683k | xd->above_mbmi->bsize > BLOCK_8X8); |
1706 | 1.56M | } |
1707 | 3.30M | if (prune_sub_8x8) { |
1708 | 2.20M | av1_disable_all_splits(part_state); |
1709 | 2.20M | } |
1710 | 3.30M | } |
1711 | | |
1712 | | // A CNN-based speed feature pruning out either split or all non-split |
1713 | | // partition in INTRA frame coding. |
1714 | 8.65M | const int try_intra_cnn_based_part_prune = |
1715 | 8.65M | frame_is_intra_only(cm) && |
1716 | 7.31M | cpi->sf.part_sf.intra_cnn_based_part_prune_level && |
1717 | 7.31M | cm->seq_params->sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 && |
1718 | 7.31M | blk_params->bsize_at_least_8x8 && |
1719 | 3.71M | av1_is_whole_blk_in_frame(blk_params, mi_params); |
1720 | | |
1721 | 8.65M | if (try_intra_cnn_based_part_prune) { |
1722 | 3.04M | intra_mode_cnn_partition(&cpi->common, x, x->part_search_info.quad_tree_idx, |
1723 | 3.04M | cpi->sf.part_sf.intra_cnn_based_part_prune_level, |
1724 | 3.04M | part_state); |
1725 | 3.04M | } |
1726 | | |
1727 | | // Use simple motion search to prune out split or non-split partitions. This |
1728 | | // must be done prior to PARTITION_SPLIT to propagate the initial mvs to a |
1729 | | // smaller blocksize. |
1730 | 8.65M | const int try_split_only = |
1731 | 8.65M | cpi->sf.part_sf.simple_motion_search_split && |
1732 | 8.65M | part_state->do_square_split && blk_params->bsize_at_least_8x8 && |
1733 | 2.61M | av1_is_whole_blk_in_frame(blk_params, mi_params) && |
1734 | 1.76M | !frame_is_intra_only(cm) && !av1_superres_scaled(cm); |
1735 | | |
1736 | 8.65M | if (try_split_only) { |
1737 | 233k | simple_motion_search_based_split(cpi, x, sms_tree, part_state); |
1738 | 233k | } |
1739 | | |
1740 | | // Use simple motion search to prune out rectangular partition in some |
1741 | | // direction. The results are stored in prune_horz and prune_vert in order to |
1742 | | // bypass future related pruning checks if a pruning decision has been made. |
1743 | | |
1744 | | // We want to search at least one partition mode, so don't prune if NONE and |
1745 | | // SPLIT are disabled. |
1746 | 8.65M | const int non_rect_part_allowed = |
1747 | 8.65M | part_state->do_square_split || part_state->partition_none_allowed; |
1748 | | // Only run the model if the partitions are not already pruned. |
1749 | 8.65M | const int rect_part_allowed = part_state->do_rectangular_split && |
1750 | 1.51M | ((part_state->partition_rect_allowed[HORZ] && |
1751 | 876k | !part_state->prune_rect_part[HORZ]) || |
1752 | 641k | (part_state->partition_rect_allowed[VERT] && |
1753 | 290k | !part_state->prune_rect_part[VERT])); |
1754 | | |
1755 | 8.65M | const int try_prune_rect = cpi->sf.part_sf.simple_motion_search_prune_rect && |
1756 | 8.65M | !frame_is_intra_only(cm) && |
1757 | 1.33M | non_rect_part_allowed && rect_part_allowed && |
1758 | 133k | !av1_superres_scaled(cm); |
1759 | | |
1760 | 8.65M | if (try_prune_rect) { |
1761 | 133k | simple_motion_search_prune_rect(cpi, x, sms_tree, part_state); |
1762 | 133k | } |
1763 | 8.65M | } |
1764 | | |
1765 | | #ifndef NDEBUG |
1766 | | static inline int is_bsize_square(BLOCK_SIZE bsize) { |
1767 | | return block_size_wide[bsize] == block_size_high[bsize]; |
1768 | | } |
1769 | | #endif // NDEBUG |
1770 | | |
1771 | | void av1_prune_partitions_by_max_min_bsize(SuperBlockEnc *sb_enc, |
1772 | 8.65M | PartitionSearchState *part_state) { |
1773 | 8.65M | assert(is_bsize_square(sb_enc->max_partition_size)); |
1774 | 8.65M | assert(is_bsize_square(sb_enc->min_partition_size)); |
1775 | 8.65M | assert(sb_enc->min_partition_size <= sb_enc->max_partition_size); |
1776 | 8.65M | const PartitionBlkParams *blk_params = &part_state->part_blk_params; |
1777 | 8.65M | const BLOCK_SIZE bsize = blk_params->bsize; |
1778 | 8.65M | assert(is_bsize_square(bsize)); |
1779 | 8.65M | const int max_partition_size_1d = block_size_wide[sb_enc->max_partition_size]; |
1780 | 8.65M | const int min_partition_size_1d = block_size_wide[sb_enc->min_partition_size]; |
1781 | 8.65M | const int bsize_1d = block_size_wide[bsize]; |
1782 | 8.65M | assert(min_partition_size_1d <= max_partition_size_1d); |
1783 | 8.65M | const int is_le_min_sq_part = bsize_1d <= min_partition_size_1d; |
1784 | 8.65M | const int is_gt_max_sq_part = bsize_1d > max_partition_size_1d; |
1785 | 8.65M | if (is_gt_max_sq_part) { |
1786 | | // If current block size is larger than max, only allow split. |
1787 | 103k | av1_set_square_split_only(part_state); |
1788 | 8.55M | } else if (is_le_min_sq_part) { |
1789 | | // If current block size is less or equal to min, only allow none if valid |
1790 | | // block large enough; only allow split otherwise. |
1791 | 3.60M | av1_disable_rect_partitions(part_state); |
1792 | | |
1793 | | // only disable square split when current block is not at the picture |
1794 | | // boundary. otherwise, inherit the square split flag from previous logic |
1795 | 3.60M | if (av1_blk_has_rows_and_cols(blk_params)) { |
1796 | 3.60M | part_state->do_square_split = 0; |
1797 | 3.60M | } |
1798 | 3.60M | part_state->partition_none_allowed = !(part_state->do_square_split); |
1799 | 3.60M | } |
1800 | 8.65M | } |
1801 | | |
1802 | | // Decide whether to evaluate the AB partition specified by part_type based on |
1803 | | // split and HORZ/VERT info |
1804 | | static int evaluate_ab_partition_based_on_split( |
1805 | | const PC_TREE *pc_tree, PARTITION_TYPE rect_part, |
1806 | | const RD_RECT_PART_WIN_INFO *rect_part_win_info, int qindex, int split_idx1, |
1807 | 0 | int split_idx2) { |
1808 | 0 | int num_win = 0; |
1809 | | // Threshold for number of winners |
1810 | | // Conservative pruning for high quantizers |
1811 | 0 | const int num_win_thresh = AOMMIN(3 * (2 * (MAXQ - qindex) / MAXQ), 3); |
1812 | 0 | int sub_part_win = |
1813 | 0 | (rect_part_win_info == NULL) ? (pc_tree->partitioning == rect_part) |
1814 | 0 | : (rect_part == PARTITION_HORZ) ? rect_part_win_info->rect_part_win[HORZ] |
1815 | 0 | : rect_part_win_info->rect_part_win[VERT]; |
1816 | 0 | num_win += (sub_part_win) ? 1 : 0; |
1817 | 0 | if (pc_tree->split[split_idx1]) { |
1818 | 0 | num_win += |
1819 | 0 | (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0; |
1820 | 0 | } else { |
1821 | 0 | num_win += 1; |
1822 | 0 | } |
1823 | 0 | if (pc_tree->split[split_idx2]) { |
1824 | 0 | num_win += |
1825 | 0 | (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0; |
1826 | 0 | } else { |
1827 | 0 | num_win += 1; |
1828 | 0 | } |
1829 | 0 | if (num_win < num_win_thresh) { |
1830 | 0 | return 0; |
1831 | 0 | } |
1832 | 0 | return 1; |
1833 | 0 | } |
1834 | | |
1835 | | void av1_prune_ab_partitions(AV1_COMP *cpi, const MACROBLOCK *x, |
1836 | | const PC_TREE *pc_tree, int pb_source_variance, |
1837 | | int64_t best_rdcost, |
1838 | | const RD_RECT_PART_WIN_INFO *rect_part_win_info, |
1839 | | bool ext_partition_allowed, |
1840 | | PartitionSearchState *part_state, |
1841 | 7.96M | int *ab_partitions_allowed) { |
1842 | 7.96M | int64_t *horz_rd = part_state->rect_part_rd[HORZ]; |
1843 | 7.96M | int64_t *vert_rd = part_state->rect_part_rd[VERT]; |
1844 | 7.96M | int64_t *split_rd = part_state->split_rd; |
1845 | 7.96M | const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg; |
1846 | | // The standard AB partitions are allowed initially if ext-partition-types are |
1847 | | // allowed. |
1848 | 7.96M | int horzab_partition_allowed = ext_partition_allowed && |
1849 | 0 | part_cfg->enable_ab_partitions && |
1850 | 0 | part_state->partition_rect_allowed[HORZ]; |
1851 | 7.96M | int vertab_partition_allowed = ext_partition_allowed && |
1852 | 0 | part_cfg->enable_ab_partitions && |
1853 | 0 | part_state->partition_rect_allowed[VERT]; |
1854 | | |
1855 | | // Pruning: pruning out AB partitions on one main direction based on the |
1856 | | // current best partition and source variance. |
1857 | 7.96M | if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1858 | 7.96M | if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 1) { |
1859 | | // TODO(debargha,huisu@google.com): may need to tune the threshold for |
1860 | | // pb_source_variance. |
1861 | 7.96M | horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || |
1862 | 7.87M | (pc_tree->partitioning == PARTITION_NONE && |
1863 | 6.36M | pb_source_variance < 32) || |
1864 | 7.82M | pc_tree->partitioning == PARTITION_SPLIT); |
1865 | 7.96M | vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || |
1866 | 7.89M | (pc_tree->partitioning == PARTITION_NONE && |
1867 | 6.36M | pb_source_variance < 32) || |
1868 | 7.84M | pc_tree->partitioning == PARTITION_SPLIT); |
1869 | 18.4E | } else { |
1870 | 18.4E | horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || |
1871 | 0 | pc_tree->partitioning == PARTITION_SPLIT); |
1872 | 18.4E | vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || |
1873 | 0 | pc_tree->partitioning == PARTITION_SPLIT); |
1874 | 18.4E | } |
1875 | 7.96M | horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0); |
1876 | 7.96M | horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0); |
1877 | 7.96M | vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0); |
1878 | 7.96M | vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0); |
1879 | 7.96M | split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0); |
1880 | 7.96M | split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0); |
1881 | 7.96M | split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0); |
1882 | 7.96M | split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0); |
1883 | 7.96M | } |
1884 | | |
1885 | | // Pruning: pruning out horz_a or horz_b if the combined rdcost of its |
1886 | | // subblocks estimated from previous partitions is much higher than the best |
1887 | | // rd so far. |
1888 | 7.96M | ab_partitions_allowed[HORZ_A] = horzab_partition_allowed; |
1889 | 7.96M | ab_partitions_allowed[HORZ_B] = horzab_partition_allowed; |
1890 | 7.96M | if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1891 | 7.96M | const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1]; |
1892 | 7.96M | const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3]; |
1893 | 7.96M | switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1894 | 7.96M | case 1: |
1895 | 7.96M | ab_partitions_allowed[HORZ_A] &= (horz_a_rd / 16 * 14 < best_rdcost); |
1896 | 7.96M | ab_partitions_allowed[HORZ_B] &= (horz_b_rd / 16 * 14 < best_rdcost); |
1897 | 7.96M | break; |
1898 | 0 | case 2: |
1899 | 0 | default: |
1900 | 0 | ab_partitions_allowed[HORZ_A] &= (horz_a_rd / 16 * 15 < best_rdcost); |
1901 | 0 | ab_partitions_allowed[HORZ_B] &= (horz_b_rd / 16 * 15 < best_rdcost); |
1902 | 0 | break; |
1903 | 7.96M | } |
1904 | 7.96M | } |
1905 | | |
1906 | | // Pruning: pruning out vert_a or vert_b if the combined rdcost of its |
1907 | | // subblocks estimated from previous partitions is much higher than the best |
1908 | | // rd so far. |
1909 | 7.96M | ab_partitions_allowed[VERT_A] = vertab_partition_allowed; |
1910 | 7.96M | ab_partitions_allowed[VERT_B] = vertab_partition_allowed; |
1911 | 7.96M | if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1912 | 7.96M | const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2]; |
1913 | 7.96M | const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3]; |
1914 | 7.96M | switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) { |
1915 | 7.96M | case 1: |
1916 | 7.96M | ab_partitions_allowed[VERT_A] &= (vert_a_rd / 16 * 14 < best_rdcost); |
1917 | 7.96M | ab_partitions_allowed[VERT_B] &= (vert_b_rd / 16 * 14 < best_rdcost); |
1918 | 7.96M | break; |
1919 | 0 | case 2: |
1920 | 0 | default: |
1921 | 0 | ab_partitions_allowed[VERT_A] &= (vert_a_rd / 16 * 15 < best_rdcost); |
1922 | 0 | ab_partitions_allowed[VERT_B] &= (vert_b_rd / 16 * 15 < best_rdcost); |
1923 | 0 | break; |
1924 | 7.96M | } |
1925 | 7.96M | } |
1926 | | |
1927 | | // Pruning: pruning out some ab partitions using a DNN taking rd costs of |
1928 | | // sub-blocks from previous basic partition types. |
1929 | 7.96M | if (cpi->sf.part_sf.ml_prune_partition && ext_partition_allowed && |
1930 | 0 | part_state->partition_rect_allowed[HORZ] && |
1931 | 0 | part_state->partition_rect_allowed[VERT]) { |
1932 | | // TODO(huisu@google.com): x->source_variance may not be the current |
1933 | | // block's variance. The correct one to use is pb_source_variance. Need to |
1934 | | // re-train the model to fix it. |
1935 | 0 | ml_prune_ab_partition(cpi, pc_tree->partitioning, |
1936 | 0 | get_unsigned_bits(x->source_variance), best_rdcost, |
1937 | 0 | part_state, ab_partitions_allowed); |
1938 | 0 | } |
1939 | | |
1940 | | // Pruning: pruning AB partitions based on the number of horz/vert wins |
1941 | | // in the current block and sub-blocks in PARTITION_SPLIT. |
1942 | 7.96M | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1943 | 7.96M | ab_partitions_allowed[HORZ_A]) { |
1944 | 0 | ab_partitions_allowed[HORZ_A] &= evaluate_ab_partition_based_on_split( |
1945 | 0 | pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 0, 1); |
1946 | 0 | } |
1947 | 7.96M | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1948 | 7.96M | ab_partitions_allowed[HORZ_B]) { |
1949 | 0 | ab_partitions_allowed[HORZ_B] &= evaluate_ab_partition_based_on_split( |
1950 | 0 | pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 2, 3); |
1951 | 0 | } |
1952 | 7.96M | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1953 | 7.96M | ab_partitions_allowed[VERT_A]) { |
1954 | 0 | ab_partitions_allowed[VERT_A] &= evaluate_ab_partition_based_on_split( |
1955 | 0 | pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 0, 2); |
1956 | 0 | } |
1957 | 7.96M | if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && |
1958 | 7.96M | ab_partitions_allowed[VERT_B]) { |
1959 | 0 | ab_partitions_allowed[VERT_B] &= evaluate_ab_partition_based_on_split( |
1960 | 0 | pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 1, 3); |
1961 | 0 | } |
1962 | 7.96M | } |
1963 | | |
1964 | | // Prepare features for the external model. Specifically, features after |
1965 | | // ab partition is searched. |
1966 | | static void prepare_features_after_part_ab( |
1967 | | const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, |
1968 | | int part_ctx, int64_t best_rd, |
1969 | | int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], |
1970 | | int64_t split_rd[SUB_PARTITIONS_SPLIT], unsigned int pb_source_variance, |
1971 | 0 | int mi_row, int mi_col, aom_partition_features_t *const features) { |
1972 | 0 | int64_t *horz_rd = rect_part_rd[HORZ]; |
1973 | 0 | int64_t *vert_rd = rect_part_rd[VERT]; |
1974 | | |
1975 | | // Generate features. |
1976 | 0 | int feature_index = 0; |
1977 | 0 | features->after_part_ab.f[feature_index++] = (float)part_ctx; |
1978 | 0 | features->after_part_ab.f[feature_index++] = |
1979 | 0 | (float)get_unsigned_bits(pb_source_variance); |
1980 | |
|
1981 | 0 | const int rdcost = (int)AOMMIN(INT_MAX, best_rd); |
1982 | 0 | int sub_block_rdcost[8] = { 0 }; |
1983 | 0 | int rd_index = 0; |
1984 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1985 | 0 | if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) |
1986 | 0 | sub_block_rdcost[rd_index] = (int)horz_rd[i]; |
1987 | 0 | ++rd_index; |
1988 | 0 | } |
1989 | 0 | for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { |
1990 | 0 | if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) |
1991 | 0 | sub_block_rdcost[rd_index] = (int)vert_rd[i]; |
1992 | 0 | ++rd_index; |
1993 | 0 | } |
1994 | 0 | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1995 | 0 | if (split_rd[i] > 0 && split_rd[i] < 1000000000) |
1996 | 0 | sub_block_rdcost[rd_index] = (int)split_rd[i]; |
1997 | 0 | ++rd_index; |
1998 | 0 | } |
1999 | 0 | for (int i = 0; i < 8; ++i) { |
2000 | | // Ratio between the sub-block RD and the whole-block RD. |
2001 | 0 | float rd_ratio = 1.0f; |
2002 | 0 | if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) |
2003 | 0 | rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; |
2004 | 0 | features->after_part_ab.f[feature_index++] = rd_ratio; |
2005 | 0 | } |
2006 | | |
2007 | | // 4-way partitions are only allowed for these three square block sizes. |
2008 | 0 | assert(bsize == BLOCK_16X16 || bsize == BLOCK_32X32 || bsize == BLOCK_64X64); |
2009 | | |
2010 | | // Get variance of the 1:4 and 4:1 sub-blocks. |
2011 | 0 | unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
2012 | 0 | unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; |
2013 | 0 | { |
2014 | 0 | BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); |
2015 | 0 | BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); |
2016 | |
|
2017 | 0 | assert(horz_4_bs != BLOCK_INVALID); |
2018 | 0 | assert(vert_4_bs != BLOCK_INVALID); |
2019 | |
|
2020 | 0 | av1_setup_src_planes(x, cpi->source, mi_row, mi_col, |
2021 | 0 | av1_num_planes(&cpi->common), bsize); |
2022 | 0 | const int src_stride = x->plane[0].src.stride; |
2023 | 0 | uint8_t *src = x->plane[0].src.buf; |
2024 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
2025 | |
|
2026 | 0 | struct buf_2d horz_4_src, vert_4_src; |
2027 | 0 | horz_4_src.stride = src_stride; |
2028 | 0 | vert_4_src.stride = src_stride; |
2029 | |
|
2030 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
2031 | 0 | horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride; |
2032 | 0 | vert_4_src.buf = src + i * block_size_wide[vert_4_bs]; |
2033 | |
|
2034 | 0 | horz_4_source_var[i] = av1_get_perpixel_variance_facade( |
2035 | 0 | cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y); |
2036 | 0 | vert_4_source_var[i] = av1_get_perpixel_variance_facade( |
2037 | 0 | cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y); |
2038 | 0 | } |
2039 | 0 | } |
2040 | |
|
2041 | 0 | const float denom = (float)(pb_source_variance + 1); |
2042 | 0 | const float low_b = 0.1f; |
2043 | 0 | const float high_b = 10.0f; |
2044 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
2045 | | // Ratio between the 4:1 sub-block variance and the whole-block variance. |
2046 | 0 | float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; |
2047 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
2048 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
2049 | 0 | features->after_part_ab.f[feature_index++] = var_ratio; |
2050 | 0 | } |
2051 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
2052 | | // Ratio between the 1:4 sub-block RD and the whole-block RD. |
2053 | 0 | float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; |
2054 | 0 | if (var_ratio < low_b) var_ratio = low_b; |
2055 | 0 | if (var_ratio > high_b) var_ratio = high_b; |
2056 | 0 | features->after_part_ab.f[feature_index++] = var_ratio; |
2057 | 0 | } |
2058 | 0 | assert(feature_index == 18); |
2059 | 0 | } |
2060 | | |
2061 | | // If the external partition model is used, we let it determine partition |
2062 | | // decisions before partition none. Specifically, these parameters: |
2063 | | // partition_none_allowed |
2064 | | // partition_horz_allowed |
2065 | | // partition_vert_allowed |
2066 | | // do_rectangular_split |
2067 | | // do_square_split |
2068 | | static bool ext_ml_model_decision_before_none( |
2069 | | AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT], |
2070 | | int *partition_none_allowed, int *partition_horz_allowed, |
2071 | | int *partition_vert_allowed, int *do_rectangular_split, |
2072 | 233k | int *do_square_split) { |
2073 | 233k | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2074 | 233k | if (!ext_part_controller->ready) return false; |
2075 | | |
2076 | | // Setup features. |
2077 | 6 | aom_partition_features_t features; |
2078 | 6 | features.id = AOM_EXT_PART_FEATURE_BEFORE_NONE; |
2079 | 6 | for (int i = 0; i < FEATURE_SIZE_SMS_SPLIT; ++i) { |
2080 | 0 | features.before_part_none.f[i] = features_from_motion[i]; |
2081 | 0 | } |
2082 | | |
2083 | | // Send necessary features to the external model. |
2084 | 6 | av1_ext_part_send_features(ext_part_controller, &features); |
2085 | | |
2086 | | // Get partition decisions from the external model. |
2087 | 6 | aom_partition_decision_t decision; |
2088 | 6 | const bool valid_decision = |
2089 | 6 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2090 | 6 | if (!valid_decision) return false; |
2091 | | |
2092 | | // Populate decisions |
2093 | 6 | *partition_none_allowed = decision.partition_none_allowed; |
2094 | 6 | *partition_horz_allowed = decision.partition_rect_allowed[HORZ]; |
2095 | 6 | *partition_vert_allowed = decision.partition_rect_allowed[VERT]; |
2096 | 6 | *do_rectangular_split = decision.do_rectangular_split; |
2097 | 6 | *do_square_split = decision.do_square_split; |
2098 | | |
2099 | 6 | return true; |
2100 | 6 | } |
2101 | | |
2102 | | // If the external partition model is used, we let it determine partition |
2103 | | // decisions before partition none. Specifically, these parameters: |
2104 | | // prune_horz |
2105 | | // prune_vert |
2106 | | static bool ext_ml_model_decision_before_none_part2( |
2107 | | AV1_COMP *cpi, |
2108 | | const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART], |
2109 | 133k | int *prune_horz, int *prune_vert) { |
2110 | 133k | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2111 | 133k | if (!ext_part_controller->ready) return false; |
2112 | | |
2113 | | // Setup features. |
2114 | 25 | aom_partition_features_t features; |
2115 | 25 | features.id = AOM_EXT_PART_FEATURE_BEFORE_NONE_PART2; |
2116 | 25 | for (int i = 0; i < FEATURE_SIZE_SMS_PRUNE_PART; ++i) { |
2117 | 0 | features.before_part_none.f_part2[i] = features_from_motion[i]; |
2118 | 0 | } |
2119 | | |
2120 | | // Send necessary features to the external model. |
2121 | 25 | av1_ext_part_send_features(ext_part_controller, &features); |
2122 | | |
2123 | | // Get partition decisions from the external model. |
2124 | 25 | aom_partition_decision_t decision; |
2125 | 25 | const bool valid_decision = |
2126 | 25 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2127 | 25 | if (!valid_decision) return false; |
2128 | | |
2129 | | // Populate decisions |
2130 | 25 | *prune_horz = decision.prune_rect_part[HORZ]; |
2131 | 25 | *prune_vert = decision.prune_rect_part[VERT]; |
2132 | | |
2133 | 25 | return true; |
2134 | 25 | } |
2135 | | |
2136 | | // If the external partition model is used, we let it determine partition |
2137 | | // decisions after none partition. Specifically, these parameters: |
2138 | | // do_square_split |
2139 | | // do_rectangular_split |
2140 | | bool ext_ml_model_decision_after_none( |
2141 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
2142 | | const float *const features_after_none, int *do_square_split, |
2143 | 9 | int *do_rectangular_split) { |
2144 | 9 | if (!ext_part_controller->ready || is_intra_frame) return false; |
2145 | | |
2146 | | // Setup features. |
2147 | 0 | aom_partition_features_t features; |
2148 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_NONE; |
2149 | 0 | for (int i = 0; i < 4; ++i) { |
2150 | 0 | features.after_part_none.f[i] = features_after_none[i]; |
2151 | 0 | } |
2152 | | |
2153 | | // Send necessary features to the external model. |
2154 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2155 | | |
2156 | | // Get partition decisions from the external model. |
2157 | 0 | aom_partition_decision_t decision; |
2158 | 0 | const bool valid_decision = |
2159 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2160 | 0 | if (!valid_decision) return false; |
2161 | | |
2162 | | // Populate decisions |
2163 | 0 | *do_square_split = decision.do_square_split; |
2164 | 0 | *do_rectangular_split = decision.do_rectangular_split; |
2165 | |
|
2166 | 0 | return true; |
2167 | 0 | } |
2168 | | |
2169 | | // If the external partition model is used, we let it determine partition |
2170 | | // decisions after none partition. Specifically, these parameters: |
2171 | | // terminate_partition_search |
2172 | | bool ext_ml_model_decision_after_none_part2( |
2173 | | AV1_COMP *const cpi, const float *const features_terminate, |
2174 | 13.3k | int *terminate_partition_search) { |
2175 | 13.3k | AV1_COMMON *const cm = &cpi->common; |
2176 | 13.3k | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2177 | 13.3k | if (!ext_part_controller->ready || frame_is_intra_only(cm)) return false; |
2178 | | |
2179 | | // Setup features. |
2180 | 3 | aom_partition_features_t features; |
2181 | 3 | features.id = AOM_EXT_PART_FEATURE_AFTER_NONE_PART2; |
2182 | 3 | for (int i = 0; i < FEATURE_SIZE_SMS_TERM_NONE; ++i) { |
2183 | 0 | features.after_part_none.f_terminate[i] = features_terminate[i]; |
2184 | 0 | } |
2185 | | |
2186 | | // Send necessary features to the external model. |
2187 | 3 | av1_ext_part_send_features(ext_part_controller, &features); |
2188 | | |
2189 | | // Get partition decisions from the external model. |
2190 | 3 | aom_partition_decision_t decision; |
2191 | 3 | const bool valid_decision = |
2192 | 3 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2193 | 3 | if (!valid_decision) return false; |
2194 | | |
2195 | | // Populate decisions |
2196 | 3 | *terminate_partition_search = decision.terminate_partition_search; |
2197 | | |
2198 | 3 | return true; |
2199 | 3 | } |
2200 | | |
2201 | | // If the external partition model is used, we let it determine partition |
2202 | | // decisions after none partition. Specifically, these parameters: |
2203 | | // terminate_partition_search |
2204 | | bool ext_ml_model_decision_after_split(AV1_COMP *const cpi, |
2205 | | const float *const features_terminate, |
2206 | 0 | int *terminate_partition_search) { |
2207 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2208 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2209 | 0 | if (frame_is_intra_only(cm) || !cpi->ext_part_controller.ready) { |
2210 | 0 | return false; |
2211 | 0 | } |
2212 | | |
2213 | | // Setup features. |
2214 | 0 | aom_partition_features_t features; |
2215 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_SPLIT; |
2216 | 0 | for (int i = 0; i < 31; ++i) { |
2217 | 0 | features.after_part_split.f_terminate[i] = features_terminate[i]; |
2218 | 0 | } |
2219 | | |
2220 | | // Send necessary features to the external model. |
2221 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2222 | | |
2223 | | // Get partition decisions from the external model. |
2224 | 0 | aom_partition_decision_t decision; |
2225 | 0 | const bool valid_decision = |
2226 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2227 | 0 | if (!valid_decision) return false; |
2228 | | |
2229 | | // Populate decisions |
2230 | 0 | *terminate_partition_search = decision.terminate_partition_search; |
2231 | |
|
2232 | 0 | return true; |
2233 | 0 | } |
2234 | | |
2235 | | // If the external partition model is used, we let it determine partition |
2236 | | // decisions after none partition. Specifically, these parameters: |
2237 | | // prune_rect_part[HORZ] |
2238 | | // prune_rect_part[VERT] |
2239 | | bool ext_ml_model_decision_after_split_part2( |
2240 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
2241 | | const float *const features_prune, int *prune_rect_part_horz, |
2242 | 0 | int *prune_rect_part_vert) { |
2243 | 0 | if (is_intra_frame || !ext_part_controller->ready) { |
2244 | 0 | return false; |
2245 | 0 | } |
2246 | | |
2247 | | // Setup features. |
2248 | 0 | aom_partition_features_t features; |
2249 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_SPLIT_PART2; |
2250 | 0 | for (int i = 0; i < 9; ++i) { |
2251 | 0 | features.after_part_split.f_prune_rect[i] = features_prune[i]; |
2252 | 0 | } |
2253 | | |
2254 | | // Send necessary features to the external model. |
2255 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2256 | | |
2257 | | // Get partition decisions from the external model. |
2258 | 0 | aom_partition_decision_t decision; |
2259 | 0 | const bool valid_decision = |
2260 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2261 | 0 | if (!valid_decision) return false; |
2262 | | |
2263 | | // Populate decisions |
2264 | 0 | *prune_rect_part_horz = decision.prune_rect_part[0]; |
2265 | 0 | *prune_rect_part_vert = decision.prune_rect_part[1]; |
2266 | |
|
2267 | 0 | return true; |
2268 | 0 | } |
2269 | | |
2270 | | // If the external partition model is used, we let it determine partition |
2271 | | // decisions after rectangular partition. Specifically, these parameters: |
2272 | | // horza_partition_allowed |
2273 | | // horzb_partition_allowed |
2274 | | // verta_partition_allowed |
2275 | | // vertb_partition_allowed |
2276 | | static bool ext_ml_model_decision_after_rect( |
2277 | | ExtPartController *const ext_part_controller, const int is_intra_frame, |
2278 | | const float *const features_after_rect, int *horza_partition_allowed, |
2279 | | int *horzb_partition_allowed, int *verta_partition_allowed, |
2280 | 0 | int *vertb_partition_allowed) { |
2281 | 0 | if (is_intra_frame || !ext_part_controller->ready) return false; |
2282 | | |
2283 | | // Setup features. |
2284 | 0 | aom_partition_features_t features; |
2285 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_RECT; |
2286 | 0 | for (int i = 0; i < 10; ++i) { |
2287 | 0 | features.after_part_rect.f[i] = features_after_rect[i]; |
2288 | 0 | } |
2289 | | |
2290 | | // Send necessary features to the external model. |
2291 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2292 | | |
2293 | | // Get partition decisions from the external model. |
2294 | 0 | aom_partition_decision_t decision; |
2295 | 0 | const bool valid_decision = |
2296 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2297 | 0 | if (!valid_decision) return false; |
2298 | | |
2299 | | // Populate decisions |
2300 | 0 | *horza_partition_allowed = decision.horza_partition_allowed; |
2301 | 0 | *horzb_partition_allowed = decision.horzb_partition_allowed; |
2302 | 0 | *verta_partition_allowed = decision.verta_partition_allowed; |
2303 | 0 | *vertb_partition_allowed = decision.vertb_partition_allowed; |
2304 | |
|
2305 | 0 | return true; |
2306 | 0 | } |
2307 | | |
2308 | | // If the external partition model is used, we let it determine partition |
2309 | | // decisions after AB partition. Specifically, these parameters: |
2310 | | // partition_vert4_allowed |
2311 | | // partition_horz4_allowed |
2312 | | static bool ext_ml_model_decision_after_part_ab( |
2313 | | AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx, |
2314 | | int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], |
2315 | | int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed, |
2316 | | int *const partition_vert4_allowed, unsigned int pb_source_variance, |
2317 | 0 | int mi_row, int mi_col) { |
2318 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2319 | 0 | ExtPartController *const ext_part_controller = &cpi->ext_part_controller; |
2320 | |
|
2321 | 0 | if (!frame_is_intra_only(cm) && ext_part_controller->ready) { |
2322 | | // Setup features. |
2323 | 0 | aom_partition_features_t features; |
2324 | 0 | features.id = AOM_EXT_PART_FEATURE_AFTER_AB; |
2325 | 0 | prepare_features_after_part_ab(cpi, x, bsize, part_ctx, best_rd, |
2326 | 0 | rect_part_rd, split_rd, pb_source_variance, |
2327 | 0 | mi_row, mi_col, &features); |
2328 | | |
2329 | | // Send necessary features to the external model. |
2330 | 0 | av1_ext_part_send_features(ext_part_controller, &features); |
2331 | | |
2332 | | // Get partition decisions from the external model. |
2333 | 0 | aom_partition_decision_t decision; |
2334 | 0 | const bool valid_decision = |
2335 | 0 | av1_ext_part_get_partition_decision(ext_part_controller, &decision); |
2336 | 0 | if (!valid_decision) return false; |
2337 | | |
2338 | | // Populate decisions |
2339 | 0 | *partition_horz4_allowed = decision.partition_horz4_allowed; |
2340 | 0 | *partition_vert4_allowed = decision.partition_vert4_allowed; |
2341 | |
|
2342 | 0 | return true; |
2343 | 0 | } |
2344 | | |
2345 | 0 | return false; |
2346 | 0 | } |
2347 | | |
2348 | | // This function resembles "av1_setup_sms_tree()" in context_tree.c |
2349 | | // with function signature change. |
2350 | | static SIMPLE_MOTION_DATA_TREE *setup_sms_tree( |
2351 | 0 | AV1_COMP *const cpi, SIMPLE_MOTION_DATA_TREE *sms_tree) { |
2352 | 0 | AV1_COMMON *const cm = &cpi->common; |
2353 | 0 | const int stat_generation_stage = is_stat_generation_stage(cpi); |
2354 | 0 | const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; |
2355 | 0 | const int tree_nodes = |
2356 | 0 | av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); |
2357 | 0 | int sms_tree_index = 0; |
2358 | 0 | SIMPLE_MOTION_DATA_TREE *this_sms; |
2359 | 0 | int square_index = 1; |
2360 | 0 | int nodes; |
2361 | 0 | this_sms = &sms_tree[0]; |
2362 | |
|
2363 | 0 | if (!stat_generation_stage) { |
2364 | 0 | const int leaf_factor = is_sb_size_128 ? 4 : 1; |
2365 | 0 | const int leaf_nodes = 256 * leaf_factor; |
2366 | | |
2367 | | // Sets up all the leaf nodes in the tree. |
2368 | 0 | for (sms_tree_index = 0; sms_tree_index < leaf_nodes; ++sms_tree_index) { |
2369 | 0 | SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; |
2370 | 0 | tree->block_size = square[0]; |
2371 | 0 | } |
2372 | | |
2373 | | // Each node has 4 leaf nodes, fill each block_size level of the tree |
2374 | | // from leafs to the root. |
2375 | 0 | for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { |
2376 | 0 | for (int i = 0; i < nodes; ++i) { |
2377 | 0 | SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; |
2378 | 0 | tree->block_size = square[square_index]; |
2379 | 0 | for (int j = 0; j < 4; j++) tree->split[j] = this_sms++; |
2380 | 0 | ++sms_tree_index; |
2381 | 0 | } |
2382 | 0 | ++square_index; |
2383 | 0 | } |
2384 | 0 | } else { |
2385 | | // Allocation for firstpass/LAP stage |
2386 | | // TODO(Mufaddal): refactor square_index to use a common block_size macro |
2387 | | // from firstpass.c |
2388 | 0 | SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; |
2389 | 0 | square_index = 2; |
2390 | 0 | tree->block_size = square[square_index]; |
2391 | 0 | } |
2392 | | |
2393 | | // Set up the root node for the largest superblock size |
2394 | 0 | return &sms_tree[tree_nodes - 1]; |
2395 | 0 | } |
2396 | | |
2397 | | static void write_motion_feature_to_file( |
2398 | | const char *const path, const int sb_counter, const unsigned int *block_sse, |
2399 | | const unsigned int *block_var, const int num_blocks, const BLOCK_SIZE bsize, |
2400 | 0 | const BLOCK_SIZE fixed_block_size, const int mi_row, const int mi_col) { |
2401 | 0 | char filename[256]; |
2402 | 0 | snprintf(filename, sizeof(filename), "%s/motion_search_feature_sb%d", path, |
2403 | 0 | sb_counter); |
2404 | 0 | FILE *pfile = fopen(filename, "w"); |
2405 | 0 | fprintf(pfile, "%d,%d,%d,%d,%d\n", mi_row, mi_col, bsize, |
2406 | 0 | block_size_wide[fixed_block_size], num_blocks); |
2407 | 0 | for (int i = 0; i < num_blocks; ++i) { |
2408 | 0 | fprintf(pfile, "%d", block_sse[i]); |
2409 | 0 | if (i < num_blocks - 1) fprintf(pfile, ","); |
2410 | 0 | } |
2411 | 0 | fprintf(pfile, "\n"); |
2412 | 0 | for (int i = 0; i < num_blocks; ++i) { |
2413 | 0 | fprintf(pfile, "%d", block_var[i]); |
2414 | 0 | if (i < num_blocks - 1) fprintf(pfile, ","); |
2415 | 0 | } |
2416 | 0 | fprintf(pfile, "\n"); |
2417 | 0 | fclose(pfile); |
2418 | 0 | } |
2419 | | |
2420 | | void av1_collect_motion_search_features_sb(AV1_COMP *const cpi, ThreadData *td, |
2421 | | TileDataEnc *tile_data, |
2422 | | const int mi_row, const int mi_col, |
2423 | | const BLOCK_SIZE bsize, |
2424 | 0 | aom_partition_features_t *features) { |
2425 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2426 | 0 | if (frame_is_intra_only(cm)) return; |
2427 | | |
2428 | 0 | MACROBLOCK *const x = &td->mb; |
2429 | 0 | const BLOCK_SIZE fixed_block_size = BLOCK_16X16; |
2430 | 0 | const int col_step = mi_size_wide[fixed_block_size]; |
2431 | 0 | const int row_step = mi_size_high[fixed_block_size]; |
2432 | 0 | SIMPLE_MOTION_DATA_TREE *sms_tree = NULL; |
2433 | 0 | const int stat_generation_stage = is_stat_generation_stage(cpi); |
2434 | 0 | const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; |
2435 | 0 | const int tree_nodes = |
2436 | 0 | av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); |
2437 | 0 | CHECK_MEM_ERROR(cm, sms_tree, aom_calloc(tree_nodes, sizeof(*sms_tree))); |
2438 | 0 | SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree); |
2439 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
2440 | 0 | av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize); |
2441 | 0 | av1_init_simple_motion_search_mvs_for_sb(cpi, NULL, x, sms_root, mi_row, |
2442 | 0 | mi_col); |
2443 | 0 | av1_reset_simple_motion_tree_partition(sms_root, bsize); |
2444 | 0 | const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
2445 | 0 | : LAST_FRAME }; |
2446 | 0 | const int mi_width = |
2447 | 0 | AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col); |
2448 | 0 | const int mi_height = |
2449 | 0 | AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row); |
2450 | 0 | const int col_steps = (mi_width / col_step) + ((mi_width % col_step) > 0); |
2451 | 0 | const int row_steps = (mi_height / row_step) + ((mi_height % row_step) > 0); |
2452 | 0 | const int num_blocks = col_steps * row_steps; |
2453 | 0 | unsigned int *block_sse = aom_calloc(num_blocks, sizeof(*block_sse)); |
2454 | 0 | unsigned int *block_var = aom_calloc(num_blocks, sizeof(*block_var)); |
2455 | 0 | if (!(block_sse && block_var)) { |
2456 | 0 | aom_free(sms_tree); |
2457 | 0 | aom_free(block_sse); |
2458 | 0 | aom_free(block_var); |
2459 | 0 | aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, |
2460 | 0 | "Error allocating block_sse & block_var"); |
2461 | 0 | } |
2462 | 0 | int idx = 0; |
2463 | |
|
2464 | 0 | for (int row = mi_row; |
2465 | 0 | row < AOMMIN(mi_row + mi_size_high[bsize], cm->mi_params.mi_rows); |
2466 | 0 | row += row_step) { |
2467 | 0 | for (int col = mi_col; |
2468 | 0 | col < AOMMIN(mi_col + mi_size_wide[bsize], cm->mi_params.mi_cols); |
2469 | 0 | col += col_step) { |
2470 | 0 | simple_motion_search_get_best_ref( |
2471 | 0 | cpi, x, sms_root, row, col, fixed_block_size, ref_list, |
2472 | 0 | /*num_refs=*/1, /*use_subpixel=*/1, |
2473 | 0 | /*save_mv=*/1, &block_sse[idx], &block_var[idx]); |
2474 | 0 | ++idx; |
2475 | 0 | } |
2476 | 0 | } |
2477 | 0 | if (features == NULL) { |
2478 | 0 | write_motion_feature_to_file(cpi->oxcf.partition_info_path, cpi->sb_counter, |
2479 | 0 | block_sse, block_var, idx, bsize, |
2480 | 0 | fixed_block_size, mi_row, mi_col); |
2481 | 0 | } else { |
2482 | 0 | features->sb_features.motion_features.unit_length = |
2483 | 0 | block_size_wide[fixed_block_size]; |
2484 | 0 | features->sb_features.motion_features.num_units = idx; |
2485 | 0 | for (int i = 0; i < idx; ++i) { |
2486 | 0 | features->sb_features.motion_features.block_sse[i] = block_sse[i]; |
2487 | 0 | features->sb_features.motion_features.block_var[i] = block_var[i]; |
2488 | 0 | } |
2489 | 0 | } |
2490 | |
|
2491 | 0 | aom_free(block_sse); |
2492 | 0 | aom_free(block_var); |
2493 | 0 | aom_free(sms_tree); |
2494 | 0 | } |
2495 | | |
2496 | | #if CONFIG_PARTITION_SEARCH_ORDER |
2497 | | void av1_prepare_motion_search_features_block( |
2498 | | AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, |
2499 | | const int mi_row, const int mi_col, const BLOCK_SIZE bsize, |
2500 | | const int valid_partition_types, unsigned int *block_sse, |
2501 | | unsigned int *block_var, unsigned int sub_block_sse[4], |
2502 | | unsigned int sub_block_var[4], unsigned int horz_block_sse[2], |
2503 | | unsigned int horz_block_var[2], unsigned int vert_block_sse[2], |
2504 | | unsigned int vert_block_var[2]) { |
2505 | | const AV1_COMMON *const cm = &cpi->common; |
2506 | | if (frame_is_intra_only(cm)) return; |
2507 | | MACROBLOCK *const x = &td->mb; |
2508 | | SIMPLE_MOTION_DATA_TREE *sms_tree = NULL; |
2509 | | const int stat_generation_stage = is_stat_generation_stage(cpi); |
2510 | | const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; |
2511 | | const int tree_nodes = |
2512 | | av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); |
2513 | | CHECK_MEM_ERROR(cm, sms_tree, aom_calloc(tree_nodes, sizeof(*sms_tree))); |
2514 | | SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree); |
2515 | | TileInfo *const tile_info = &tile_data->tile_info; |
2516 | | av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize); |
2517 | | av1_reset_simple_motion_tree_partition(sms_root, bsize); |
2518 | | const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME |
2519 | | : LAST_FRAME }; |
2520 | | const int sub_mi_width = mi_size_wide[bsize] / 2; |
2521 | | const int sub_mi_height = sub_mi_width; |
2522 | | simple_motion_search_get_best_ref( |
2523 | | cpi, x, sms_root, mi_row, mi_col, bsize, ref_list, /*num_refs=*/1, |
2524 | | /*use_subpixel=*/1, /*save_mv=*/1, block_sse, block_var); |
2525 | | // Split to 4 sub blocks. |
2526 | | if (valid_partition_types & (1 << PARTITION_SPLIT)) { |
2527 | | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
2528 | | for (int i = 0; i < 4; ++i) { |
2529 | | const int row = mi_row + (i >> 1) * sub_mi_height; |
2530 | | const int col = mi_col + (i & 1) * sub_mi_width; |
2531 | | simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, |
2532 | | ref_list, /*num_refs=*/1, |
2533 | | /*use_subpixel=*/1, /*save_mv=*/1, |
2534 | | &sub_block_sse[i], &sub_block_var[i]); |
2535 | | } |
2536 | | } |
2537 | | // Horizontal split |
2538 | | if (valid_partition_types & (1 << PARTITION_HORZ)) { |
2539 | | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); |
2540 | | for (int i = 0; i < 2; ++i) { |
2541 | | const int row = mi_row + (i & 1) * sub_mi_height; |
2542 | | const int col = mi_col; |
2543 | | simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, |
2544 | | ref_list, /*num_refs=*/1, |
2545 | | /*use_subpixel=*/1, /*save_mv=*/1, |
2546 | | &horz_block_sse[i], &horz_block_var[i]); |
2547 | | } |
2548 | | } |
2549 | | // Vertical split |
2550 | | if (valid_partition_types & (1 << PARTITION_VERT)) { |
2551 | | const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); |
2552 | | for (int i = 0; i < 2; ++i) { |
2553 | | const int row = mi_row; |
2554 | | const int col = mi_col + (i & 1) * sub_mi_width; |
2555 | | simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, |
2556 | | ref_list, /*num_refs=*/1, |
2557 | | /*use_subpixel=*/1, /*save_mv=*/1, |
2558 | | &vert_block_sse[i], &vert_block_var[i]); |
2559 | | } |
2560 | | } |
2561 | | |
2562 | | aom_free(sms_tree); |
2563 | | } |
2564 | | #endif // CONFIG_PARTITION_SEARCH_ORDER |
2565 | | #endif // !CONFIG_REALTIME_ONLY |
2566 | | |
2567 | | static inline void init_simple_motion_search_mvs( |
2568 | 14.2M | SIMPLE_MOTION_DATA_TREE *sms_tree, const FULLPEL_MV *start_mvs) { |
2569 | 14.2M | memcpy(sms_tree->start_mvs, start_mvs, sizeof(sms_tree->start_mvs)); |
2570 | 14.2M | av1_zero(sms_tree->sms_none_feat); |
2571 | 14.2M | av1_zero(sms_tree->sms_rect_feat); |
2572 | 14.2M | av1_zero(sms_tree->sms_none_valid); |
2573 | 14.2M | av1_zero(sms_tree->sms_rect_valid); |
2574 | | |
2575 | 14.2M | if (sms_tree->block_size >= BLOCK_8X8) { |
2576 | 3.57M | init_simple_motion_search_mvs(sms_tree->split[0], start_mvs); |
2577 | 3.57M | init_simple_motion_search_mvs(sms_tree->split[1], start_mvs); |
2578 | 3.57M | init_simple_motion_search_mvs(sms_tree->split[2], start_mvs); |
2579 | 3.57M | init_simple_motion_search_mvs(sms_tree->split[3], start_mvs); |
2580 | 3.57M | } |
2581 | 14.2M | } |
2582 | | |
2583 | | void av1_init_simple_motion_search_mvs_for_sb(const AV1_COMP *cpi, |
2584 | | const TileInfo *tile_info, |
2585 | | MACROBLOCK *x, |
2586 | | SIMPLE_MOTION_DATA_TREE *sms_root, |
2587 | 42.4k | int mi_row, int mi_col) { |
2588 | | // Use the NEARESTMV of the sb as the start mv |
2589 | 42.4k | const AV1_COMMON *cm = &cpi->common; |
2590 | 42.4k | MACROBLOCKD *const xd = &x->e_mbd; |
2591 | 42.4k | FULLPEL_MV ref_mvs[REF_FRAMES]; |
2592 | 42.4k | const BLOCK_SIZE sb_size = cm->seq_params->sb_size; |
2593 | 42.4k | av1_zero(ref_mvs); |
2594 | | // If tile_info is NULL, assume that the offsets have already been set. |
2595 | 42.4k | if (tile_info) { |
2596 | 42.4k | av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, |
2597 | 42.4k | sb_size); |
2598 | 42.4k | } |
2599 | | |
2600 | 42.4k | MB_MODE_INFO_EXT mbmi_ext; |
2601 | 42.4k | const int ref_frame = |
2602 | 42.4k | cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; |
2603 | 42.4k | av1_find_mv_refs(cm, xd, xd->mi[0], ref_frame, mbmi_ext.ref_mv_count, |
2604 | 42.4k | xd->ref_mv_stack, xd->weight, NULL, mbmi_ext.global_mvs, |
2605 | 42.4k | mbmi_ext.mode_context); |
2606 | 42.4k | if (mbmi_ext.ref_mv_count[ref_frame] > 0) { |
2607 | 11.8k | ref_mvs[ref_frame] = |
2608 | 11.8k | get_fullmv_from_mv(&xd->ref_mv_stack[ref_frame][0].this_mv.as_mv); |
2609 | 30.6k | } else { |
2610 | 30.6k | ref_mvs[ref_frame] = |
2611 | 30.6k | get_fullmv_from_mv(&mbmi_ext.global_mvs[ref_frame].as_mv); |
2612 | 30.6k | } |
2613 | | |
2614 | 42.4k | init_simple_motion_search_mvs(sms_root, ref_mvs); |
2615 | 42.4k | } |