/src/libvpx/vp9/encoder/vp9_encodeframe.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <float.h> |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdio.h> |
15 | | |
16 | | #include "./vp9_rtcd.h" |
17 | | #include "./vpx_dsp_rtcd.h" |
18 | | #include "./vpx_config.h" |
19 | | |
20 | | #include "vpx_dsp/vpx_dsp_common.h" |
21 | | #include "vpx_ports/mem.h" |
22 | | #include "vpx_ports/vpx_timer.h" |
23 | | #include "vpx_ports/system_state.h" |
24 | | #include "vpx_util/vpx_pthread.h" |
25 | | #if CONFIG_MISMATCH_DEBUG |
26 | | #include "vpx_util/vpx_debug_util.h" |
27 | | #endif // CONFIG_MISMATCH_DEBUG |
28 | | |
29 | | #include "vp9/common/vp9_common.h" |
30 | | #include "vp9/common/vp9_entropy.h" |
31 | | #include "vp9/common/vp9_entropymode.h" |
32 | | #include "vp9/common/vp9_idct.h" |
33 | | #include "vp9/common/vp9_mvref_common.h" |
34 | | #include "vp9/common/vp9_pred_common.h" |
35 | | #include "vp9/common/vp9_quant_common.h" |
36 | | #include "vp9/common/vp9_reconintra.h" |
37 | | #include "vp9/common/vp9_reconinter.h" |
38 | | #include "vp9/common/vp9_seg_common.h" |
39 | | #include "vp9/common/vp9_tile_common.h" |
40 | | #if !CONFIG_REALTIME_ONLY |
41 | | #include "vp9/encoder/vp9_aq_360.h" |
42 | | #include "vp9/encoder/vp9_aq_complexity.h" |
43 | | #endif |
44 | | #include "vp9/encoder/vp9_aq_cyclicrefresh.h" |
45 | | #if !CONFIG_REALTIME_ONLY |
46 | | #include "vp9/encoder/vp9_aq_variance.h" |
47 | | #endif |
48 | | #include "vp9/encoder/vp9_encodeframe.h" |
49 | | #include "vp9/encoder/vp9_encodemb.h" |
50 | | #include "vp9/encoder/vp9_encodemv.h" |
51 | | #include "vp9/encoder/vp9_encoder.h" |
52 | | #include "vp9/encoder/vp9_ethread.h" |
53 | | #include "vp9/encoder/vp9_extend.h" |
54 | | #include "vp9/encoder/vp9_multi_thread.h" |
55 | | #include "vp9/encoder/vp9_partition_models.h" |
56 | | #include "vp9/encoder/vp9_pickmode.h" |
57 | | #include "vp9/encoder/vp9_rd.h" |
58 | | #include "vp9/encoder/vp9_rdopt.h" |
59 | | #include "vp9/encoder/vp9_segmentation.h" |
60 | | #include "vp9/encoder/vp9_tokenize.h" |
61 | | |
62 | | static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, |
63 | | int output_enabled, int mi_row, int mi_col, |
64 | | BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); |
65 | | |
66 | | // This is used as a reference when computing the source variance for the |
67 | | // purpose of activity masking. |
68 | | // Eventually this should be replaced by custom no-reference routines, |
69 | | // which will be faster. |
70 | | static const uint8_t VP9_VAR_OFFS[64] = { |
71 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
72 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
73 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
74 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
75 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 |
76 | | }; |
77 | | |
78 | | #if CONFIG_VP9_HIGHBITDEPTH |
79 | | static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { |
80 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
81 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
82 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
83 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
84 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 |
85 | | }; |
86 | | |
87 | | static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { |
88 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
89 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
90 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
91 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
92 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
93 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
94 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
95 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 |
96 | | }; |
97 | | |
98 | | static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { |
99 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
100 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
101 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
102 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
103 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
104 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
105 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
106 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
107 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
108 | | 128 * 16 |
109 | | }; |
110 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
111 | | |
112 | | unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, |
113 | 8.85M | BLOCK_SIZE bs) { |
114 | 8.85M | unsigned int sse; |
115 | 8.85M | const unsigned int var = |
116 | 8.85M | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); |
117 | 8.85M | return var; |
118 | 8.85M | } |
119 | | |
120 | | #if CONFIG_VP9_HIGHBITDEPTH |
121 | | unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, |
122 | 0 | BLOCK_SIZE bs, int bd) { |
123 | 0 | unsigned int var, sse; |
124 | 0 | switch (bd) { |
125 | 0 | case 10: |
126 | 0 | var = |
127 | 0 | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
128 | 0 | CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse); |
129 | 0 | break; |
130 | 0 | case 12: |
131 | 0 | var = |
132 | 0 | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
133 | 0 | CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse); |
134 | 0 | break; |
135 | 0 | case 8: |
136 | 0 | default: |
137 | 0 | var = |
138 | 0 | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
139 | 0 | CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | return var; |
143 | 0 | } |
144 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
145 | | |
146 | | unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, |
147 | | const struct buf_2d *ref, |
148 | 8.85M | BLOCK_SIZE bs) { |
149 | 8.85M | return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs), |
150 | 8.85M | num_pels_log2_lookup[bs]); |
151 | 8.85M | } |
152 | | |
153 | | #if CONFIG_VP9_HIGHBITDEPTH |
154 | | unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, |
155 | | const struct buf_2d *ref, |
156 | 0 | BLOCK_SIZE bs, int bd) { |
157 | 0 | return (unsigned int)ROUND64_POWER_OF_TWO( |
158 | 0 | (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd), |
159 | 0 | num_pels_log2_lookup[bs]); |
160 | 0 | } |
161 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
162 | | |
163 | | static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, |
164 | 8.85M | int mi_col, BLOCK_SIZE bsize, int segment_index) { |
165 | 8.85M | VP9_COMMON *const cm = &cpi->common; |
166 | 8.85M | const struct segmentation *const seg = &cm->seg; |
167 | 8.85M | MACROBLOCKD *const xd = &x->e_mbd; |
168 | 8.85M | MODE_INFO *mi = xd->mi[0]; |
169 | | |
170 | 8.85M | const AQ_MODE aq_mode = cpi->oxcf.aq_mode; |
171 | 8.85M | const uint8_t *const map = |
172 | 8.85M | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
173 | | |
174 | | // Initialize the segmentation index as 0. |
175 | 8.85M | mi->segment_id = 0; |
176 | | |
177 | | // Skip the rest if AQ mode is disabled. |
178 | 8.85M | if (!seg->enabled) return; |
179 | | |
180 | 0 | switch (aq_mode) { |
181 | 0 | case CYCLIC_REFRESH_AQ: |
182 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
183 | 0 | break; |
184 | 0 | #if !CONFIG_REALTIME_ONLY |
185 | 0 | case VARIANCE_AQ: |
186 | 0 | if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || |
187 | 0 | cpi->force_update_segmentation || |
188 | 0 | (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { |
189 | 0 | int min_energy; |
190 | 0 | int max_energy; |
191 | | // Get sub block energy range |
192 | 0 | if (bsize >= BLOCK_32X32) { |
193 | 0 | vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, |
194 | 0 | &max_energy); |
195 | 0 | } else { |
196 | 0 | min_energy = bsize <= BLOCK_16X16 ? x->mb_energy |
197 | 0 | : vp9_block_energy(cpi, x, bsize); |
198 | 0 | } |
199 | 0 | mi->segment_id = vp9_vaq_segment_id(min_energy); |
200 | 0 | } else { |
201 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
202 | 0 | } |
203 | 0 | break; |
204 | 0 | case EQUATOR360_AQ: |
205 | 0 | if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) |
206 | 0 | mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); |
207 | 0 | else |
208 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
209 | 0 | break; |
210 | 0 | #endif |
211 | 0 | case LOOKAHEAD_AQ: |
212 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
213 | 0 | break; |
214 | 0 | case PSNR_AQ: mi->segment_id = segment_index; break; |
215 | 0 | case PERCEPTUAL_AQ: mi->segment_id = x->segment_id; break; |
216 | 0 | default: |
217 | | // NO_AQ or PSNR_AQ |
218 | 0 | break; |
219 | 0 | } |
220 | | |
221 | | // Set segment index if ROI map or active_map is enabled. |
222 | 0 | if (cpi->roi.enabled || cpi->active_map.enabled) |
223 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
224 | |
|
225 | 0 | vp9_init_plane_quantizers(cpi, x); |
226 | 0 | } |
227 | | |
228 | | // Lighter version of set_offsets that only sets the mode info |
229 | | // pointers. |
230 | | static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, |
231 | | MACROBLOCK *const x, |
232 | | MACROBLOCKD *const xd, int mi_row, |
233 | 19.0M | int mi_col) { |
234 | 19.0M | const int idx_str = xd->mi_stride * mi_row + mi_col; |
235 | 19.0M | xd->mi = cm->mi_grid_visible + idx_str; |
236 | 19.0M | xd->mi[0] = cm->mi + idx_str; |
237 | 19.0M | x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); |
238 | 19.0M | } |
239 | | |
240 | | static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, |
241 | | const BLOCK_SIZE bsize, const int mi_row, |
242 | 0 | const int mi_col, int *const rdmult) { |
243 | 0 | const VP9_COMMON *const cm = &cpi->common; |
244 | |
|
245 | 0 | const int bsize_base = BLOCK_16X16; |
246 | 0 | const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base]; |
247 | 0 | const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base]; |
248 | 0 | const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; |
249 | 0 | const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; |
250 | 0 | const int num_bcols = |
251 | 0 | (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w; |
252 | 0 | const int num_brows = |
253 | 0 | (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h; |
254 | 0 | int row, col; |
255 | 0 | double num_of_mi = 0.0; |
256 | 0 | double geom_mean_of_scale = 0.0; |
257 | |
|
258 | 0 | assert(cpi->oxcf.tuning == VP8_TUNE_SSIM); |
259 | |
|
260 | 0 | for (row = mi_row / num_8x8_w; |
261 | 0 | row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) { |
262 | 0 | for (col = mi_col / num_8x8_h; |
263 | 0 | col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) { |
264 | 0 | const int index = row * num_cols + col; |
265 | 0 | geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]); |
266 | 0 | num_of_mi += 1.0; |
267 | 0 | } |
268 | 0 | } |
269 | 0 | geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi); |
270 | |
|
271 | 0 | *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale); |
272 | 0 | *rdmult = VPXMAX(*rdmult, 0); |
273 | 0 | set_error_per_bit(x, *rdmult); |
274 | 0 | vpx_clear_system_state(); |
275 | 0 | } |
276 | | |
277 | | static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, |
278 | | MACROBLOCK *const x, int mi_row, int mi_col, |
279 | 19.0M | BLOCK_SIZE bsize) { |
280 | 19.0M | VP9_COMMON *const cm = &cpi->common; |
281 | 19.0M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
282 | 19.0M | MACROBLOCKD *const xd = &x->e_mbd; |
283 | 19.0M | const int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
284 | 19.0M | const int mi_height = num_8x8_blocks_high_lookup[bsize]; |
285 | 19.0M | MvLimits *const mv_limits = &x->mv_limits; |
286 | | |
287 | 19.0M | set_skip_context(xd, mi_row, mi_col); |
288 | | |
289 | 19.0M | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
290 | | |
291 | | // Set up destination pointers. |
292 | 19.0M | vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); |
293 | | |
294 | | // Set up limit values for MV components. |
295 | | // Mv beyond the range do not produce new/different prediction block. |
296 | 19.0M | mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); |
297 | 19.0M | mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); |
298 | 19.0M | mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; |
299 | 19.0M | mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; |
300 | | |
301 | | // Set up distance of MB to edge of frame in 1/8th pel units. |
302 | 19.0M | assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); |
303 | 19.0M | set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, |
304 | 19.0M | cm->mi_cols); |
305 | | |
306 | | // Set up source buffers. |
307 | 19.0M | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
308 | | |
309 | | // R/D setup. |
310 | 19.0M | x->rddiv = cpi->rd.RDDIV; |
311 | 19.0M | x->rdmult = cpi->rd.RDMULT; |
312 | 19.0M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
313 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
314 | 0 | } |
315 | | |
316 | | // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() |
317 | 19.0M | xd->tile = *tile; |
318 | 19.0M | } |
319 | | |
320 | | static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, |
321 | | int mi_row, int mi_col, |
322 | 0 | BLOCK_SIZE bsize) { |
323 | 0 | const int block_width = |
324 | 0 | VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col); |
325 | 0 | const int block_height = |
326 | 0 | VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row); |
327 | 0 | const int mi_stride = xd->mi_stride; |
328 | 0 | MODE_INFO *const src_mi = xd->mi[0]; |
329 | 0 | int i, j; |
330 | |
|
331 | 0 | for (j = 0; j < block_height; ++j) |
332 | 0 | for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; |
333 | 0 | } |
334 | | |
335 | | static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x, |
336 | | MACROBLOCKD *const xd, int mi_row, int mi_col, |
337 | 0 | BLOCK_SIZE bsize) { |
338 | 0 | if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { |
339 | 0 | set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); |
340 | 0 | xd->mi[0]->sb_type = bsize; |
341 | 0 | } |
342 | 0 | } |
343 | | |
344 | | typedef struct { |
345 | | // This struct is used for computing variance in choose_partitioning(), where |
346 | | // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even |
347 | | // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16 |
348 | | // * 16 = 2^32). |
349 | | uint32_t sum_square_error; |
350 | | int32_t sum_error; |
351 | | int log2_count; |
352 | | int variance; |
353 | | } Var; |
354 | | |
355 | | typedef struct { |
356 | | Var none; |
357 | | Var horz[2]; |
358 | | Var vert[2]; |
359 | | } partition_variance; |
360 | | |
361 | | typedef struct { |
362 | | partition_variance part_variances; |
363 | | Var split[4]; |
364 | | } v4x4; |
365 | | |
366 | | typedef struct { |
367 | | partition_variance part_variances; |
368 | | v4x4 split[4]; |
369 | | } v8x8; |
370 | | |
371 | | typedef struct { |
372 | | partition_variance part_variances; |
373 | | v8x8 split[4]; |
374 | | } v16x16; |
375 | | |
376 | | typedef struct { |
377 | | partition_variance part_variances; |
378 | | v16x16 split[4]; |
379 | | } v32x32; |
380 | | |
381 | | typedef struct { |
382 | | partition_variance part_variances; |
383 | | v32x32 split[4]; |
384 | | } v64x64; |
385 | | |
386 | | typedef struct { |
387 | | partition_variance *part_variances; |
388 | | Var *split[4]; |
389 | | } variance_node; |
390 | | |
391 | | typedef enum { |
392 | | V16X16, |
393 | | V32X32, |
394 | | V64X64, |
395 | | } TREE_LEVEL; |
396 | | |
397 | 0 | static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { |
398 | 0 | int i; |
399 | 0 | node->part_variances = NULL; |
400 | 0 | switch (bsize) { |
401 | 0 | case BLOCK_64X64: { |
402 | 0 | v64x64 *vt = (v64x64 *)data; |
403 | 0 | node->part_variances = &vt->part_variances; |
404 | 0 | for (i = 0; i < 4; i++) |
405 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
406 | 0 | break; |
407 | 0 | } |
408 | 0 | case BLOCK_32X32: { |
409 | 0 | v32x32 *vt = (v32x32 *)data; |
410 | 0 | node->part_variances = &vt->part_variances; |
411 | 0 | for (i = 0; i < 4; i++) |
412 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
413 | 0 | break; |
414 | 0 | } |
415 | 0 | case BLOCK_16X16: { |
416 | 0 | v16x16 *vt = (v16x16 *)data; |
417 | 0 | node->part_variances = &vt->part_variances; |
418 | 0 | for (i = 0; i < 4; i++) |
419 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
420 | 0 | break; |
421 | 0 | } |
422 | 0 | case BLOCK_8X8: { |
423 | 0 | v8x8 *vt = (v8x8 *)data; |
424 | 0 | node->part_variances = &vt->part_variances; |
425 | 0 | for (i = 0; i < 4; i++) |
426 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
427 | 0 | break; |
428 | 0 | } |
429 | 0 | default: { |
430 | 0 | v4x4 *vt = (v4x4 *)data; |
431 | 0 | assert(bsize == BLOCK_4X4); |
432 | 0 | node->part_variances = &vt->part_variances; |
433 | 0 | for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; |
434 | 0 | break; |
435 | 0 | } |
436 | 0 | } |
437 | 0 | } |
438 | | |
439 | | // Set variance values given sum square error, sum error, count. |
440 | 0 | static void fill_variance(uint32_t s2, int32_t s, int c, Var *v) { |
441 | 0 | v->sum_square_error = s2; |
442 | 0 | v->sum_error = s; |
443 | 0 | v->log2_count = c; |
444 | 0 | } |
445 | | |
446 | 0 | static void get_variance(Var *v) { |
447 | 0 | v->variance = |
448 | 0 | (int)(256 * (v->sum_square_error - |
449 | 0 | (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> |
450 | 0 | v->log2_count)) >> |
451 | 0 | v->log2_count); |
452 | 0 | } |
453 | | |
454 | 0 | static void sum_2_variances(const Var *a, const Var *b, Var *r) { |
455 | 0 | assert(a->log2_count == b->log2_count); |
456 | 0 | fill_variance(a->sum_square_error + b->sum_square_error, |
457 | 0 | a->sum_error + b->sum_error, a->log2_count + 1, r); |
458 | 0 | } |
459 | | |
460 | 0 | static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { |
461 | 0 | variance_node node; |
462 | 0 | memset(&node, 0, sizeof(node)); |
463 | 0 | tree_to_node(data, bsize, &node); |
464 | 0 | sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); |
465 | 0 | sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); |
466 | 0 | sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); |
467 | 0 | sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); |
468 | 0 | sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], |
469 | 0 | &node.part_variances->none); |
470 | 0 | } |
471 | | |
472 | | static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, |
473 | | MACROBLOCKD *const xd, void *data, |
474 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
475 | | int64_t threshold, BLOCK_SIZE bsize_min, |
476 | 0 | int force_split) { |
477 | 0 | VP9_COMMON *const cm = &cpi->common; |
478 | 0 | variance_node vt; |
479 | 0 | const int block_width = num_8x8_blocks_wide_lookup[bsize]; |
480 | 0 | const int block_height = num_8x8_blocks_high_lookup[bsize]; |
481 | |
|
482 | 0 | assert(block_height == block_width); |
483 | 0 | tree_to_node(data, bsize, &vt); |
484 | |
|
485 | 0 | if (force_split == 1) return 0; |
486 | | |
487 | | // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if |
488 | | // variance is below threshold, otherwise split will be selected. |
489 | | // No check for vert/horiz split as too few samples for variance. |
490 | 0 | if (bsize == bsize_min) { |
491 | | // Variance already computed to set the force_split. |
492 | 0 | if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
493 | 0 | if (mi_col + block_width / 2 < cm->mi_cols && |
494 | 0 | mi_row + block_height / 2 < cm->mi_rows && |
495 | 0 | vt.part_variances->none.variance < threshold) { |
496 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
497 | 0 | return 1; |
498 | 0 | } |
499 | 0 | return 0; |
500 | 0 | } else if (bsize > bsize_min) { |
501 | | // Variance already computed to set the force_split. |
502 | 0 | if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
503 | | // For key frame: take split for bsize above 32X32 or very high variance. |
504 | 0 | if (frame_is_intra_only(cm) && |
505 | 0 | (bsize > BLOCK_32X32 || |
506 | 0 | vt.part_variances->none.variance > (threshold << 4))) { |
507 | 0 | return 0; |
508 | 0 | } |
509 | | // If variance is low, take the bsize (no split). |
510 | 0 | if (mi_col + block_width / 2 < cm->mi_cols && |
511 | 0 | mi_row + block_height / 2 < cm->mi_rows && |
512 | 0 | vt.part_variances->none.variance < threshold) { |
513 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
514 | 0 | return 1; |
515 | 0 | } |
516 | | |
517 | | // Check vertical split. |
518 | 0 | if (mi_row + block_height / 2 < cm->mi_rows) { |
519 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); |
520 | 0 | get_variance(&vt.part_variances->vert[0]); |
521 | 0 | get_variance(&vt.part_variances->vert[1]); |
522 | 0 | if (vt.part_variances->vert[0].variance < threshold && |
523 | 0 | vt.part_variances->vert[1].variance < threshold && |
524 | 0 | get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { |
525 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
526 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); |
527 | 0 | return 1; |
528 | 0 | } |
529 | 0 | } |
530 | | // Check horizontal split. |
531 | 0 | if (mi_col + block_width / 2 < cm->mi_cols) { |
532 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); |
533 | 0 | get_variance(&vt.part_variances->horz[0]); |
534 | 0 | get_variance(&vt.part_variances->horz[1]); |
535 | 0 | if (vt.part_variances->horz[0].variance < threshold && |
536 | 0 | vt.part_variances->horz[1].variance < threshold && |
537 | 0 | get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { |
538 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
539 | 0 | set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); |
540 | 0 | return 1; |
541 | 0 | } |
542 | 0 | } |
543 | | |
544 | 0 | return 0; |
545 | 0 | } |
546 | 0 | return 0; |
547 | 0 | } |
548 | | |
549 | | static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, |
550 | | int width, int height, |
551 | 0 | int content_state) { |
552 | 0 | if (speed >= 8) { |
553 | 0 | if (width <= 640 && height <= 480) |
554 | 0 | return (5 * threshold_base) >> 2; |
555 | 0 | else if ((content_state == kLowSadLowSumdiff) || |
556 | 0 | (content_state == kHighSadLowSumdiff) || |
557 | 0 | (content_state == kLowVarHighSumdiff)) |
558 | 0 | return (5 * threshold_base) >> 2; |
559 | 0 | } else if (speed == 7) { |
560 | 0 | if ((content_state == kLowSadLowSumdiff) || |
561 | 0 | (content_state == kHighSadLowSumdiff) || |
562 | 0 | (content_state == kLowVarHighSumdiff)) { |
563 | 0 | return (5 * threshold_base) >> 2; |
564 | 0 | } |
565 | 0 | } |
566 | 0 | return threshold_base; |
567 | 0 | } |
568 | | |
569 | | // Set the variance split thresholds for following the block sizes: |
570 | | // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, |
571 | | // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is |
572 | | // currently only used on key frame. |
573 | | static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, |
574 | 0 | int content_state) { |
575 | 0 | VP9_COMMON *const cm = &cpi->common; |
576 | 0 | const int is_key_frame = frame_is_intra_only(cm); |
577 | 0 | const int threshold_multiplier = |
578 | 0 | is_key_frame ? 20 : cpi->sf.variance_part_thresh_mult; |
579 | 0 | int64_t threshold_base = |
580 | 0 | (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); |
581 | |
|
582 | 0 | if (is_key_frame) { |
583 | 0 | thresholds[0] = threshold_base; |
584 | 0 | thresholds[1] = threshold_base >> 2; |
585 | 0 | thresholds[2] = threshold_base >> 2; |
586 | 0 | thresholds[3] = threshold_base << 2; |
587 | 0 | } else { |
588 | | // Increase base variance threshold based on estimated noise level. |
589 | 0 | if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) { |
590 | 0 | NOISE_LEVEL noise_level = |
591 | 0 | vp9_noise_estimate_extract_level(&cpi->noise_estimate); |
592 | 0 | if (noise_level == kHigh) |
593 | 0 | threshold_base = 3 * threshold_base; |
594 | 0 | else if (noise_level == kMedium) |
595 | 0 | threshold_base = threshold_base << 1; |
596 | 0 | else if (noise_level < kLow) |
597 | 0 | threshold_base = (7 * threshold_base) >> 3; |
598 | 0 | } |
599 | | #if CONFIG_VP9_TEMPORAL_DENOISING |
600 | | if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && |
601 | | cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) |
602 | | threshold_base = |
603 | | vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level, |
604 | | content_state, cpi->svc.temporal_layer_id); |
605 | | else |
606 | | threshold_base = |
607 | | scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, |
608 | | cm->height, content_state); |
609 | | #else |
610 | | // Increase base variance threshold based on content_state/sum_diff level. |
611 | 0 | threshold_base = scale_part_thresh_sumdiff( |
612 | 0 | threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); |
613 | 0 | #endif |
614 | 0 | thresholds[0] = threshold_base; |
615 | 0 | thresholds[2] = threshold_base << cpi->oxcf.speed; |
616 | 0 | if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7) |
617 | 0 | thresholds[2] = thresholds[2] << 1; |
618 | 0 | if (cm->width <= 352 && cm->height <= 288) { |
619 | 0 | thresholds[0] = threshold_base >> 3; |
620 | 0 | thresholds[1] = threshold_base >> 1; |
621 | 0 | thresholds[2] = threshold_base << 3; |
622 | 0 | if (cpi->rc.avg_frame_qindex[INTER_FRAME] > 220) |
623 | 0 | thresholds[2] = thresholds[2] << 2; |
624 | 0 | else if (cpi->rc.avg_frame_qindex[INTER_FRAME] > 200) |
625 | 0 | thresholds[2] = thresholds[2] << 1; |
626 | 0 | } else if (cm->width < 1280 && cm->height < 720) { |
627 | 0 | thresholds[1] = (5 * threshold_base) >> 2; |
628 | 0 | } else if (cm->width < 1920 && cm->height < 1080) { |
629 | 0 | thresholds[1] = threshold_base << 1; |
630 | 0 | } else { |
631 | 0 | thresholds[1] = (5 * threshold_base) >> 1; |
632 | 0 | } |
633 | 0 | if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; |
634 | 0 | } |
635 | 0 | } |
636 | | |
637 | | void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, |
638 | 42.2k | int content_state) { |
639 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
640 | 42.2k | SPEED_FEATURES *const sf = &cpi->sf; |
641 | 42.2k | const int is_key_frame = frame_is_intra_only(cm); |
642 | 42.2k | if (sf->partition_search_type != VAR_BASED_PARTITION && |
643 | 42.2k | sf->partition_search_type != REFERENCE_PARTITION) { |
644 | 42.2k | return; |
645 | 42.2k | } else { |
646 | 0 | set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); |
647 | | // The thresholds below are not changed locally. |
648 | 0 | if (is_key_frame) { |
649 | 0 | cpi->vbp_threshold_sad = 0; |
650 | 0 | cpi->vbp_threshold_copy = 0; |
651 | 0 | cpi->vbp_bsize_min = BLOCK_8X8; |
652 | 0 | } else { |
653 | 0 | if (cm->width <= 352 && cm->height <= 288) |
654 | 0 | cpi->vbp_threshold_sad = 10; |
655 | 0 | else |
656 | 0 | cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 |
657 | 0 | ? (cpi->y_dequant[q][1] << 1) |
658 | 0 | : 1000; |
659 | 0 | cpi->vbp_bsize_min = BLOCK_16X16; |
660 | 0 | if (cm->width <= 352 && cm->height <= 288) |
661 | 0 | cpi->vbp_threshold_copy = 4000; |
662 | 0 | else if (cm->width <= 640 && cm->height <= 360) |
663 | 0 | cpi->vbp_threshold_copy = 8000; |
664 | 0 | else |
665 | 0 | cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 |
666 | 0 | ? (cpi->y_dequant[q][1] << 3) |
667 | 0 | : 8000; |
668 | 0 | if (cpi->rc.high_source_sad || |
669 | 0 | (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { |
670 | 0 | cpi->vbp_threshold_sad = 0; |
671 | 0 | cpi->vbp_threshold_copy = 0; |
672 | 0 | } |
673 | 0 | } |
674 | 0 | cpi->vbp_threshold_minmax = 15 + (q >> 3); |
675 | 0 | } |
676 | 42.2k | } |
677 | | |
678 | | // Compute the minmax over the 8x8 subblocks. |
679 | | static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, |
680 | | int dp, int x16_idx, int y16_idx, |
681 | | #if CONFIG_VP9_HIGHBITDEPTH |
682 | | int highbd_flag, |
683 | | #endif |
684 | 0 | int pixels_wide, int pixels_high) { |
685 | 0 | int k; |
686 | 0 | int minmax_max = 0; |
687 | 0 | int minmax_min = 255; |
688 | | // Loop over the 4 8x8 subblocks. |
689 | 0 | for (k = 0; k < 4; k++) { |
690 | 0 | int x8_idx = x16_idx + ((k & 1) << 3); |
691 | 0 | int y8_idx = y16_idx + ((k >> 1) << 3); |
692 | 0 | int min = 0; |
693 | 0 | int max = 0; |
694 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
695 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
696 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
697 | 0 | vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, |
698 | 0 | d + y8_idx * dp + x8_idx, dp, &min, &max); |
699 | 0 | } else { |
700 | 0 | vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, |
701 | 0 | dp, &min, &max); |
702 | 0 | } |
703 | | #else |
704 | | vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, |
705 | | &min, &max); |
706 | | #endif |
707 | 0 | if ((max - min) > minmax_max) minmax_max = (max - min); |
708 | 0 | if ((max - min) < minmax_min) minmax_min = (max - min); |
709 | 0 | } |
710 | 0 | } |
711 | 0 | return (minmax_max - minmax_min); |
712 | 0 | } |
713 | | |
714 | | static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, |
715 | | int dp, int x8_idx, int y8_idx, v8x8 *vst, |
716 | | #if CONFIG_VP9_HIGHBITDEPTH |
717 | | int highbd_flag, |
718 | | #endif |
719 | | int pixels_wide, int pixels_high, |
720 | 0 | int is_key_frame) { |
721 | 0 | int k; |
722 | 0 | for (k = 0; k < 4; k++) { |
723 | 0 | int x4_idx = x8_idx + ((k & 1) << 2); |
724 | 0 | int y4_idx = y8_idx + ((k >> 1) << 2); |
725 | 0 | unsigned int sse = 0; |
726 | 0 | int sum = 0; |
727 | 0 | if (x4_idx < pixels_wide && y4_idx < pixels_high) { |
728 | 0 | int s_avg; |
729 | 0 | int d_avg = 128; |
730 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
731 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
732 | 0 | s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
733 | 0 | if (!is_key_frame) |
734 | 0 | d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
735 | 0 | } else { |
736 | 0 | s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
737 | 0 | if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
738 | 0 | } |
739 | | #else |
740 | | s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
741 | | if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
742 | | #endif |
743 | 0 | sum = s_avg - d_avg; |
744 | 0 | sse = sum * sum; |
745 | 0 | } |
746 | 0 | fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); |
747 | 0 | } |
748 | 0 | } |
749 | | |
750 | | static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, |
751 | | int dp, int x16_idx, int y16_idx, v16x16 *vst, |
752 | | #if CONFIG_VP9_HIGHBITDEPTH |
753 | | int highbd_flag, |
754 | | #endif |
755 | | int pixels_wide, int pixels_high, |
756 | 0 | int is_key_frame) { |
757 | 0 | int k; |
758 | 0 | for (k = 0; k < 4; k++) { |
759 | 0 | int x8_idx = x16_idx + ((k & 1) << 3); |
760 | 0 | int y8_idx = y16_idx + ((k >> 1) << 3); |
761 | 0 | unsigned int sse = 0; |
762 | 0 | int sum = 0; |
763 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
764 | 0 | int s_avg; |
765 | 0 | int d_avg = 128; |
766 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
767 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
768 | 0 | s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
769 | 0 | if (!is_key_frame) |
770 | 0 | d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
771 | 0 | } else { |
772 | 0 | s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
773 | 0 | if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
774 | 0 | } |
775 | | #else |
776 | | s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
777 | | if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
778 | | #endif |
779 | 0 | sum = s_avg - d_avg; |
780 | 0 | sse = sum * sum; |
781 | 0 | } |
782 | 0 | fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); |
783 | 0 | } |
784 | 0 | } |
785 | | |
786 | | // Check if most of the superblock is skin content, and if so, force split to |
787 | | // 32x32, and set x->sb_is_skin for use in mode selection. |
788 | | static int skin_sb_split(VP9_COMP *cpi, const int low_res, int mi_row, |
789 | 0 | int mi_col, int *force_split) { |
790 | 0 | VP9_COMMON *const cm = &cpi->common; |
791 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
792 | 0 | if (cm->use_highbitdepth) return 0; |
793 | 0 | #endif |
794 | | // Avoid checking superblocks on/near boundary and avoid low resolutions. |
795 | | // Note superblock may still pick 64X64 if y_sad is very small |
796 | | // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. |
797 | 0 | if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && |
798 | 0 | mi_row + 8 < cm->mi_rows)) { |
799 | 0 | int num_16x16_skin = 0; |
800 | 0 | int num_16x16_nonskin = 0; |
801 | 0 | const int block_index = mi_row * cm->mi_cols + mi_col; |
802 | 0 | const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
803 | 0 | const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
804 | 0 | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
805 | 0 | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
806 | | // Loop through the 16x16 sub-blocks. |
807 | 0 | int i, j; |
808 | 0 | for (i = 0; i < ymis; i += 2) { |
809 | 0 | for (j = 0; j < xmis; j += 2) { |
810 | 0 | int bl_index = block_index + i * cm->mi_cols + j; |
811 | 0 | int is_skin = cpi->skin_map[bl_index]; |
812 | 0 | num_16x16_skin += is_skin; |
813 | 0 | num_16x16_nonskin += (1 - is_skin); |
814 | 0 | if (num_16x16_nonskin > 3) { |
815 | | // Exit loop if at least 4 of the 16x16 blocks are not skin. |
816 | 0 | i = ymis; |
817 | 0 | break; |
818 | 0 | } |
819 | 0 | } |
820 | 0 | } |
821 | 0 | if (num_16x16_skin > 12) { |
822 | 0 | *force_split = 1; |
823 | 0 | return 1; |
824 | 0 | } |
825 | 0 | } |
826 | 0 | return 0; |
827 | 0 | } |
828 | | |
829 | | static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
830 | | v64x64 *vt, int64_t thresholds[], |
831 | | MV_REFERENCE_FRAME ref_frame_partition, |
832 | 0 | int mi_col, int mi_row) { |
833 | 0 | int i, j; |
834 | 0 | VP9_COMMON *const cm = &cpi->common; |
835 | 0 | const int mv_thr = cm->width > 640 ? 8 : 4; |
836 | | // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and |
837 | | // int_pro mv is small. If the temporal variance is small set the flag |
838 | | // variance_low for the block. The variance threshold can be adjusted, the |
839 | | // higher the more aggressive. |
840 | 0 | if (ref_frame_partition == LAST_FRAME && |
841 | 0 | (cpi->sf.short_circuit_low_temp_var == 1 || |
842 | 0 | (xd->mi[0]->mv[0].as_mv.col < mv_thr && |
843 | 0 | xd->mi[0]->mv[0].as_mv.col > -mv_thr && |
844 | 0 | xd->mi[0]->mv[0].as_mv.row < mv_thr && |
845 | 0 | xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { |
846 | 0 | if (xd->mi[0]->sb_type == BLOCK_64X64) { |
847 | 0 | if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) |
848 | 0 | x->variance_low[0] = 1; |
849 | 0 | } else if (xd->mi[0]->sb_type == BLOCK_64X32) { |
850 | 0 | for (i = 0; i < 2; i++) { |
851 | 0 | if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) |
852 | 0 | x->variance_low[i + 1] = 1; |
853 | 0 | } |
854 | 0 | } else if (xd->mi[0]->sb_type == BLOCK_32X64) { |
855 | 0 | for (i = 0; i < 2; i++) { |
856 | 0 | if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) |
857 | 0 | x->variance_low[i + 3] = 1; |
858 | 0 | } |
859 | 0 | } else { |
860 | 0 | for (i = 0; i < 4; i++) { |
861 | 0 | const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }; |
862 | 0 | const int idx_str = |
863 | 0 | cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1]; |
864 | 0 | MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; |
865 | |
|
866 | 0 | if (cm->mi_cols <= mi_col + idx[i][1] || |
867 | 0 | cm->mi_rows <= mi_row + idx[i][0]) |
868 | 0 | continue; |
869 | | |
870 | 0 | if ((*this_mi)->sb_type == BLOCK_32X32) { |
871 | 0 | int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 || |
872 | 0 | cpi->sf.short_circuit_low_temp_var == 3) |
873 | 0 | ? ((5 * thresholds[1]) >> 3) |
874 | 0 | : (thresholds[1] >> 1); |
875 | 0 | if (vt->split[i].part_variances.none.variance < threshold_32x32) |
876 | 0 | x->variance_low[i + 5] = 1; |
877 | 0 | } else if (cpi->sf.short_circuit_low_temp_var >= 2) { |
878 | | // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block |
879 | | // inside. |
880 | 0 | if ((*this_mi)->sb_type == BLOCK_16X16 || |
881 | 0 | (*this_mi)->sb_type == BLOCK_32X16 || |
882 | 0 | (*this_mi)->sb_type == BLOCK_16X32) { |
883 | 0 | for (j = 0; j < 4; j++) { |
884 | 0 | if (vt->split[i].split[j].part_variances.none.variance < |
885 | 0 | (thresholds[2] >> 8)) |
886 | 0 | x->variance_low[(i << 2) + j + 9] = 1; |
887 | 0 | } |
888 | 0 | } |
889 | 0 | } |
890 | 0 | } |
891 | 0 | } |
892 | 0 | } |
893 | 0 | } |
894 | | |
895 | | static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, |
896 | | MACROBLOCKD *xd, BLOCK_SIZE bsize, |
897 | 0 | int mi_row, int mi_col) { |
898 | 0 | VP9_COMMON *const cm = &cpi->common; |
899 | 0 | BLOCK_SIZE *prev_part = cpi->prev_partition; |
900 | 0 | int start_pos = mi_row * cm->mi_stride + mi_col; |
901 | |
|
902 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
903 | 0 | const int bs = (1 << bsl) >> 2; |
904 | 0 | BLOCK_SIZE subsize; |
905 | 0 | PARTITION_TYPE partition; |
906 | |
|
907 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
908 | | |
909 | 0 | partition = partition_lookup[bsl][prev_part[start_pos]]; |
910 | 0 | subsize = get_subsize(bsize, partition); |
911 | |
|
912 | 0 | if (subsize < BLOCK_8X8) { |
913 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
914 | 0 | } else { |
915 | 0 | switch (partition) { |
916 | 0 | case PARTITION_NONE: |
917 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
918 | 0 | break; |
919 | 0 | case PARTITION_HORZ: |
920 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
921 | 0 | set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize); |
922 | 0 | break; |
923 | 0 | case PARTITION_VERT: |
924 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
925 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); |
926 | 0 | break; |
927 | 0 | default: |
928 | 0 | assert(partition == PARTITION_SPLIT); |
929 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); |
930 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); |
931 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); |
932 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); |
933 | 0 | break; |
934 | 0 | } |
935 | 0 | } |
936 | 0 | } |
937 | | |
938 | | static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
939 | | int mi_row, int mi_col, int segment_id, |
940 | 0 | int sb_offset) { |
941 | 0 | int svc_copy_allowed = 1; |
942 | 0 | int frames_since_key_thresh = 1; |
943 | 0 | if (cpi->use_svc) { |
944 | | // For SVC, don't allow copy if base spatial layer is key frame, or if |
945 | | // frame is not a temporal enhancement layer frame. |
946 | 0 | int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, |
947 | 0 | cpi->svc.number_temporal_layers); |
948 | 0 | const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; |
949 | 0 | if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0; |
950 | 0 | frames_since_key_thresh = cpi->svc.number_spatial_layers << 1; |
951 | 0 | } |
952 | 0 | if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed && |
953 | 0 | !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE && |
954 | 0 | cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE && |
955 | 0 | cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) { |
956 | 0 | if (cpi->prev_partition != NULL) { |
957 | 0 | copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col); |
958 | 0 | cpi->copied_frame_cnt[sb_offset] += 1; |
959 | 0 | memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]), |
960 | 0 | sizeof(x->variance_low)); |
961 | 0 | return 1; |
962 | 0 | } |
963 | 0 | } |
964 | | |
965 | 0 | return 0; |
966 | 0 | } |
967 | | |
968 | | static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
969 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
970 | 0 | int mi_row_high, int mi_col_high) { |
971 | 0 | VP9_COMMON *const cm = &cpi->common; |
972 | 0 | SVC *const svc = &cpi->svc; |
973 | 0 | BLOCK_SIZE *prev_part = svc->prev_partition_svc; |
974 | | // Variables with _high are for higher resolution. |
975 | 0 | int bsize_high = 0; |
976 | 0 | int subsize_high = 0; |
977 | 0 | const int bsl_high = b_width_log2_lookup[bsize]; |
978 | 0 | const int bs_high = (1 << bsl_high) >> 2; |
979 | 0 | const int has_rows = (mi_row_high + bs_high) < cm->mi_rows; |
980 | 0 | const int has_cols = (mi_col_high + bs_high) < cm->mi_cols; |
981 | |
|
982 | 0 | const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 1, 0, |
983 | 0 | 1, 1, 0, 1, 1, |
984 | 0 | 0, 1, 0 }; |
985 | 0 | const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 2, 2, |
986 | 0 | 0, 2, 2, 0, 2, |
987 | 0 | 2, 0, 0 }; |
988 | 0 | int start_pos; |
989 | 0 | BLOCK_SIZE bsize_low; |
990 | 0 | PARTITION_TYPE partition_high; |
991 | |
|
992 | 0 | if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; |
993 | 0 | if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || |
994 | 0 | mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) |
995 | 0 | return 0; |
996 | | |
997 | | // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. |
998 | 0 | start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; |
999 | 0 | bsize_low = prev_part[start_pos]; |
1000 | | // The block size is too big for boundaries. Do variance based partitioning. |
1001 | 0 | if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1; |
1002 | | |
1003 | | // For reference frames: return 1 (do variance-based partitioning) if the |
1004 | | // superblock is not low source sad and lower-resoln bsize is below 32x32. |
1005 | 0 | if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad && |
1006 | 0 | bsize_low < BLOCK_32X32) |
1007 | 0 | return 1; |
1008 | | |
1009 | | // Scale up block size by 2x2. Force 64x64 for size larger than 32x32. |
1010 | 0 | if (bsize_low < BLOCK_32X32) { |
1011 | 0 | bsize_high = bsize_low + 3; |
1012 | 0 | } else if (bsize_low >= BLOCK_32X32) { |
1013 | 0 | bsize_high = BLOCK_64X64; |
1014 | 0 | } |
1015 | | // Scale up blocks on boundary. |
1016 | 0 | if (!has_cols && has_rows) { |
1017 | 0 | bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low]; |
1018 | 0 | } else if (has_cols && !has_rows) { |
1019 | 0 | bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low]; |
1020 | 0 | } else if (!has_cols && !has_rows) { |
1021 | 0 | bsize_high = bsize_low; |
1022 | 0 | } |
1023 | |
|
1024 | 0 | partition_high = partition_lookup[bsl_high][bsize_high]; |
1025 | 0 | subsize_high = get_subsize(bsize, partition_high); |
1026 | |
|
1027 | 0 | if (subsize_high < BLOCK_8X8) { |
1028 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); |
1029 | 0 | } else { |
1030 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
1031 | 0 | const int bs = (1 << bsl) >> 2; |
1032 | 0 | switch (partition_high) { |
1033 | 0 | case PARTITION_NONE: |
1034 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); |
1035 | 0 | break; |
1036 | 0 | case PARTITION_HORZ: |
1037 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); |
1038 | 0 | if (subsize_high < BLOCK_64X64) |
1039 | 0 | set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high, |
1040 | 0 | subsize_high); |
1041 | 0 | break; |
1042 | 0 | case PARTITION_VERT: |
1043 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); |
1044 | 0 | if (subsize_high < BLOCK_64X64) |
1045 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, |
1046 | 0 | subsize_high); |
1047 | 0 | break; |
1048 | 0 | default: |
1049 | 0 | assert(partition_high == PARTITION_SPLIT); |
1050 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, |
1051 | 0 | mi_row_high, mi_col_high)) |
1052 | 0 | return 1; |
1053 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), |
1054 | 0 | mi_col, mi_row_high + bs_high, mi_col_high)) |
1055 | 0 | return 1; |
1056 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, |
1057 | 0 | mi_col + (bs >> 1), mi_row_high, |
1058 | 0 | mi_col_high + bs_high)) |
1059 | 0 | return 1; |
1060 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), |
1061 | 0 | mi_col + (bs >> 1), mi_row_high + bs_high, |
1062 | 0 | mi_col_high + bs_high)) |
1063 | 0 | return 1; |
1064 | 0 | break; |
1065 | 0 | } |
1066 | 0 | } |
1067 | | |
1068 | 0 | return 0; |
1069 | 0 | } |
1070 | | |
1071 | | static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
1072 | 0 | int mi_col) { |
1073 | 0 | VP9_COMMON *const cm = &cpi->common; |
1074 | 0 | BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc; |
1075 | 0 | int start_pos = mi_row * cm->mi_stride + mi_col; |
1076 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
1077 | 0 | const int bs = (1 << bsl) >> 2; |
1078 | 0 | BLOCK_SIZE subsize; |
1079 | 0 | PARTITION_TYPE partition; |
1080 | 0 | const MODE_INFO *mi = NULL; |
1081 | 0 | int xx, yy; |
1082 | |
|
1083 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
1084 | | |
1085 | 0 | mi = cm->mi_grid_visible[start_pos]; |
1086 | 0 | partition = partition_lookup[bsl][mi->sb_type]; |
1087 | 0 | subsize = get_subsize(bsize, partition); |
1088 | 0 | if (subsize < BLOCK_8X8) { |
1089 | 0 | prev_part[start_pos] = bsize; |
1090 | 0 | } else { |
1091 | 0 | switch (partition) { |
1092 | 0 | case PARTITION_NONE: |
1093 | 0 | prev_part[start_pos] = bsize; |
1094 | 0 | if (bsize == BLOCK_64X64) { |
1095 | 0 | for (xx = 0; xx < 8; xx += 4) |
1096 | 0 | for (yy = 0; yy < 8; yy += 4) { |
1097 | 0 | if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols)) |
1098 | 0 | prev_part[start_pos + xx * cm->mi_stride + yy] = bsize; |
1099 | 0 | } |
1100 | 0 | } |
1101 | 0 | break; |
1102 | 0 | case PARTITION_HORZ: |
1103 | 0 | prev_part[start_pos] = subsize; |
1104 | 0 | if (mi_row + bs < cm->mi_rows) |
1105 | 0 | prev_part[start_pos + bs * cm->mi_stride] = subsize; |
1106 | 0 | break; |
1107 | 0 | case PARTITION_VERT: |
1108 | 0 | prev_part[start_pos] = subsize; |
1109 | 0 | if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; |
1110 | 0 | break; |
1111 | 0 | default: |
1112 | 0 | assert(partition == PARTITION_SPLIT); |
1113 | 0 | update_partition_svc(cpi, subsize, mi_row, mi_col); |
1114 | 0 | update_partition_svc(cpi, subsize, mi_row + bs, mi_col); |
1115 | 0 | update_partition_svc(cpi, subsize, mi_row, mi_col + bs); |
1116 | 0 | update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); |
1117 | 0 | break; |
1118 | 0 | } |
1119 | 0 | } |
1120 | 0 | } |
1121 | | |
1122 | | static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, |
1123 | 0 | int mi_row, int mi_col) { |
1124 | 0 | VP9_COMMON *const cm = &cpi->common; |
1125 | 0 | BLOCK_SIZE *prev_part = cpi->prev_partition; |
1126 | 0 | int start_pos = mi_row * cm->mi_stride + mi_col; |
1127 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
1128 | 0 | const int bs = (1 << bsl) >> 2; |
1129 | 0 | BLOCK_SIZE subsize; |
1130 | 0 | PARTITION_TYPE partition; |
1131 | 0 | const MODE_INFO *mi = NULL; |
1132 | |
|
1133 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
1134 | | |
1135 | 0 | mi = cm->mi_grid_visible[start_pos]; |
1136 | 0 | partition = partition_lookup[bsl][mi->sb_type]; |
1137 | 0 | subsize = get_subsize(bsize, partition); |
1138 | 0 | if (subsize < BLOCK_8X8) { |
1139 | 0 | prev_part[start_pos] = bsize; |
1140 | 0 | } else { |
1141 | 0 | switch (partition) { |
1142 | 0 | case PARTITION_NONE: prev_part[start_pos] = bsize; break; |
1143 | 0 | case PARTITION_HORZ: |
1144 | 0 | prev_part[start_pos] = subsize; |
1145 | 0 | if (mi_row + bs < cm->mi_rows) |
1146 | 0 | prev_part[start_pos + bs * cm->mi_stride] = subsize; |
1147 | 0 | break; |
1148 | 0 | case PARTITION_VERT: |
1149 | 0 | prev_part[start_pos] = subsize; |
1150 | 0 | if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; |
1151 | 0 | break; |
1152 | 0 | default: |
1153 | 0 | assert(partition == PARTITION_SPLIT); |
1154 | 0 | update_prev_partition_helper(cpi, subsize, mi_row, mi_col); |
1155 | 0 | update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); |
1156 | 0 | update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); |
1157 | 0 | update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); |
1158 | 0 | break; |
1159 | 0 | } |
1160 | 0 | } |
1161 | 0 | } |
1162 | | |
1163 | | static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id, |
1164 | 0 | int mi_row, int mi_col, int sb_offset) { |
1165 | 0 | update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col); |
1166 | 0 | cpi->prev_segment_id[sb_offset] = segment_id; |
1167 | 0 | memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low, |
1168 | 0 | sizeof(x->variance_low)); |
1169 | | // Reset the counter for copy partitioning |
1170 | 0 | cpi->copied_frame_cnt[sb_offset] = 0; |
1171 | 0 | } |
1172 | | |
1173 | | static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, |
1174 | | unsigned int y_sad, int is_key_frame, |
1175 | 0 | int scene_change_detected) { |
1176 | 0 | int i; |
1177 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1178 | 0 | int shift = 2; |
1179 | |
|
1180 | 0 | if (is_key_frame) return; |
1181 | | |
1182 | | // For speed > 8, avoid the chroma check if y_sad is above threshold. |
1183 | 0 | if (cpi->oxcf.speed > 8) { |
1184 | 0 | if (y_sad > cpi->vbp_thresholds[1] && |
1185 | 0 | (!cpi->noise_estimate.enabled || |
1186 | 0 | vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) |
1187 | 0 | return; |
1188 | 0 | } |
1189 | | |
1190 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && scene_change_detected) |
1191 | 0 | shift = 5; |
1192 | |
|
1193 | 0 | for (i = 1; i <= 2; ++i) { |
1194 | 0 | unsigned int uv_sad = UINT_MAX; |
1195 | 0 | struct macroblock_plane *p = &x->plane[i]; |
1196 | 0 | struct macroblockd_plane *pd = &xd->plane[i]; |
1197 | 0 | const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); |
1198 | |
|
1199 | 0 | if (bs != BLOCK_INVALID) |
1200 | 0 | uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, |
1201 | 0 | pd->dst.stride); |
1202 | | |
1203 | | // TODO(marpan): Investigate if we should lower this threshold if |
1204 | | // superblock is detected as skin. |
1205 | 0 | x->color_sensitivity[i - 1] = uv_sad > (y_sad >> shift); |
1206 | 0 | } |
1207 | 0 | } |
1208 | | |
1209 | | static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, |
1210 | 0 | int sb_offset) { |
1211 | 0 | unsigned int tmp_sse; |
1212 | 0 | uint64_t tmp_sad; |
1213 | 0 | unsigned int tmp_variance; |
1214 | 0 | const BLOCK_SIZE bsize = BLOCK_64X64; |
1215 | 0 | uint8_t *src_y = cpi->Source->y_buffer; |
1216 | 0 | int src_ystride = cpi->Source->y_stride; |
1217 | 0 | uint8_t *last_src_y = cpi->Last_Source->y_buffer; |
1218 | 0 | int last_src_ystride = cpi->Last_Source->y_stride; |
1219 | 0 | uint64_t avg_source_sad_threshold = 10000; |
1220 | 0 | uint64_t avg_source_sad_threshold2 = 12000; |
1221 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1222 | 0 | if (cpi->common.use_highbitdepth) return 0; |
1223 | 0 | #endif |
1224 | 0 | src_y += shift; |
1225 | 0 | last_src_y += shift; |
1226 | 0 | tmp_sad = |
1227 | 0 | cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); |
1228 | 0 | tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, |
1229 | 0 | last_src_ystride, &tmp_sse); |
1230 | | // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) |
1231 | 0 | if (tmp_sad < avg_source_sad_threshold) |
1232 | 0 | x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff |
1233 | 0 | : kLowSadHighSumdiff; |
1234 | 0 | else |
1235 | 0 | x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff |
1236 | 0 | : kHighSadHighSumdiff; |
1237 | | |
1238 | | // Detect large lighting change. |
1239 | 0 | if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && |
1240 | 0 | cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) && |
1241 | 0 | (tmp_sse - tmp_variance) > 10000) |
1242 | 0 | x->content_state_sb = kLowVarHighSumdiff; |
1243 | 0 | else if (tmp_sad > (avg_source_sad_threshold << 1)) |
1244 | 0 | x->content_state_sb = kVeryHighSad; |
1245 | |
|
1246 | 0 | if (cpi->content_state_sb_fd != NULL) { |
1247 | 0 | if (tmp_sad < avg_source_sad_threshold2) { |
1248 | | // Cap the increment to 255. |
1249 | 0 | if (cpi->content_state_sb_fd[sb_offset] < 255) |
1250 | 0 | cpi->content_state_sb_fd[sb_offset]++; |
1251 | 0 | } else { |
1252 | 0 | cpi->content_state_sb_fd[sb_offset] = 0; |
1253 | 0 | } |
1254 | 0 | } |
1255 | 0 | if (tmp_sad == 0) x->zero_temp_sad_source = 1; |
1256 | 0 | return tmp_sad; |
1257 | 0 | } |
1258 | | |
1259 | | // This function chooses partitioning based on the variance between source and |
1260 | | // reconstructed last, where variance is computed for down-sampled inputs. |
1261 | | static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, |
1262 | 0 | MACROBLOCK *x, int mi_row, int mi_col) { |
1263 | 0 | VP9_COMMON *const cm = &cpi->common; |
1264 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1265 | 0 | int i, j, k, m; |
1266 | 0 | v64x64 vt; |
1267 | 0 | v16x16 *vt2 = NULL; |
1268 | 0 | int force_split[21]; |
1269 | 0 | int avg_32x32; |
1270 | 0 | int max_var_32x32 = 0; |
1271 | 0 | int min_var_32x32 = INT_MAX; |
1272 | 0 | int var_32x32; |
1273 | 0 | int avg_16x16[4]; |
1274 | 0 | int maxvar_16x16[4]; |
1275 | 0 | int minvar_16x16[4]; |
1276 | 0 | int64_t threshold_4x4avg; |
1277 | 0 | NOISE_LEVEL noise_level = kLow; |
1278 | 0 | int content_state = 0; |
1279 | 0 | uint8_t *s; |
1280 | 0 | const uint8_t *d; |
1281 | 0 | int sp; |
1282 | 0 | int dp; |
1283 | 0 | int compute_minmax_variance = 1; |
1284 | 0 | unsigned int y_sad = UINT_MAX; |
1285 | 0 | BLOCK_SIZE bsize = BLOCK_64X64; |
1286 | | // Ref frame used in partitioning. |
1287 | 0 | MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; |
1288 | 0 | int pixels_wide = 64, pixels_high = 64; |
1289 | 0 | int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], |
1290 | 0 | cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; |
1291 | 0 | int scene_change_detected = |
1292 | 0 | cpi->rc.high_source_sad || |
1293 | 0 | (cpi->use_svc && cpi->svc.high_source_sad_superframe); |
1294 | 0 | int force_64_split = scene_change_detected || |
1295 | 0 | (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
1296 | 0 | cpi->compute_source_sad_onepass && |
1297 | 0 | cpi->sf.use_source_sad && !x->zero_temp_sad_source); |
1298 | | |
1299 | | // For the variance computation under SVC mode, we treat the frame as key if |
1300 | | // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). |
1301 | 0 | int is_key_frame = |
1302 | 0 | (frame_is_intra_only(cm) || |
1303 | 0 | (is_one_pass_svc(cpi) && |
1304 | 0 | cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); |
1305 | |
|
1306 | 0 | if (!is_key_frame) { |
1307 | 0 | if (cm->frame_refs[LAST_FRAME - 1].sf.x_scale_fp == REF_INVALID_SCALE || |
1308 | 0 | cm->frame_refs[LAST_FRAME - 1].sf.y_scale_fp == REF_INVALID_SCALE) |
1309 | 0 | is_key_frame = 1; |
1310 | 0 | } |
1311 | | |
1312 | | // Always use 4x4 partition for key frame. |
1313 | 0 | const int use_4x4_partition = frame_is_intra_only(cm); |
1314 | 0 | const int low_res = (cm->width <= 352 && cm->height <= 288); |
1315 | 0 | int variance4x4downsample[16]; |
1316 | 0 | int segment_id; |
1317 | 0 | int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); |
1318 | | |
1319 | | // For SVC: check if LAST frame is NULL or if the resolution of LAST is |
1320 | | // different than the current frame resolution, and if so, treat this frame |
1321 | | // as a key frame, for the purpose of the superblock partitioning. |
1322 | | // LAST == NULL can happen in some cases where enhancement spatial layers are |
1323 | | // enabled dyanmically in the stream and the only reference is the spatial |
1324 | | // reference (GOLDEN). |
1325 | 0 | if (cpi->use_svc) { |
1326 | 0 | const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME); |
1327 | 0 | if (ref == NULL || ref->y_crop_height != cm->height || |
1328 | 0 | ref->y_crop_width != cm->width) |
1329 | 0 | is_key_frame = 1; |
1330 | 0 | } |
1331 | |
|
1332 | 0 | set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); |
1333 | 0 | set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); |
1334 | 0 | segment_id = xd->mi[0]->segment_id; |
1335 | |
|
1336 | 0 | if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) |
1337 | 0 | compute_minmax_variance = 0; |
1338 | |
|
1339 | 0 | memset(x->variance_low, 0, sizeof(x->variance_low)); |
1340 | |
|
1341 | 0 | if (cpi->sf.use_source_sad && !is_key_frame) { |
1342 | 0 | int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
1343 | 0 | content_state = x->content_state_sb; |
1344 | 0 | x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || |
1345 | 0 | content_state == kLowSadHighSumdiff) |
1346 | 0 | ? 1 |
1347 | 0 | : 0; |
1348 | 0 | x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; |
1349 | 0 | if (cpi->content_state_sb_fd != NULL) |
1350 | 0 | x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; |
1351 | | |
1352 | | // For SVC on top spatial layer: use/scale the partition from |
1353 | | // the lower spatial resolution if svc_use_lowres_part is enabled. |
1354 | 0 | if (cpi->sf.svc_use_lowres_part && |
1355 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && |
1356 | 0 | cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) { |
1357 | 0 | if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1, |
1358 | 0 | mi_col >> 1, mi_row, mi_col)) { |
1359 | 0 | if (cpi->sf.copy_partition_flag) { |
1360 | 0 | update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
1361 | 0 | } |
1362 | 0 | return 0; |
1363 | 0 | } |
1364 | 0 | } |
1365 | | // If source_sad is low copy the partition without computing the y_sad. |
1366 | 0 | if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && |
1367 | 0 | !force_64_split && |
1368 | 0 | copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { |
1369 | 0 | x->sb_use_mv_part = 1; |
1370 | 0 | if (cpi->sf.svc_use_lowres_part && |
1371 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1372 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1373 | 0 | return 0; |
1374 | 0 | } |
1375 | 0 | } |
1376 | | |
1377 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && |
1378 | 0 | cyclic_refresh_segment_id_boosted(segment_id)) { |
1379 | 0 | int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); |
1380 | 0 | set_vbp_thresholds(cpi, thresholds, q, content_state); |
1381 | 0 | } else { |
1382 | 0 | set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); |
1383 | 0 | } |
1384 | | // Decrease 32x32 split threshold for screen on base layer, for scene |
1385 | | // change/high motion frames. |
1386 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
1387 | 0 | cpi->svc.spatial_layer_id == 0 && force_64_split) |
1388 | 0 | thresholds[1] = 3 * thresholds[1] >> 2; |
1389 | | |
1390 | | // For non keyframes, disable 4x4 average for low resolution when speed = 8 |
1391 | 0 | threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; |
1392 | |
|
1393 | 0 | if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); |
1394 | 0 | if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); |
1395 | |
|
1396 | 0 | s = x->plane[0].src.buf; |
1397 | 0 | sp = x->plane[0].src.stride; |
1398 | | |
1399 | | // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, |
1400 | | // 5-20 for the 16x16 blocks. |
1401 | 0 | force_split[0] = force_64_split; |
1402 | |
|
1403 | 0 | if (!is_key_frame) { |
1404 | | // In the case of spatial/temporal scalable coding, the assumption here is |
1405 | | // that the temporal reference frame will always be of type LAST_FRAME. |
1406 | | // TODO(marpan): If that assumption is broken, we need to revisit this code. |
1407 | 0 | MODE_INFO *mi = xd->mi[0]; |
1408 | 0 | YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); |
1409 | |
|
1410 | 0 | const YV12_BUFFER_CONFIG *yv12_g = NULL; |
1411 | 0 | unsigned int y_sad_g, y_sad_thr, y_sad_last; |
1412 | 0 | bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + |
1413 | 0 | (mi_row + 4 < cm->mi_rows); |
1414 | |
|
1415 | 0 | assert(yv12 != NULL); |
1416 | |
|
1417 | 0 | if (!(is_one_pass_svc(cpi) && cpi->svc.spatial_layer_id) || |
1418 | 0 | cpi->svc.use_gf_temporal_ref_current_layer) { |
1419 | | // For now, GOLDEN will not be used for non-zero spatial layers, since |
1420 | | // it may not be a temporal reference. |
1421 | 0 | yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); |
1422 | 0 | } |
1423 | | |
1424 | | // Only compute y_sad_g (sad for golden reference) for speed < 8. |
1425 | 0 | if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && |
1426 | 0 | (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { |
1427 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
1428 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
1429 | 0 | y_sad_g = cpi->fn_ptr[bsize].sdf( |
1430 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
1431 | 0 | xd->plane[0].pre[0].stride); |
1432 | 0 | } else { |
1433 | 0 | y_sad_g = UINT_MAX; |
1434 | 0 | } |
1435 | |
|
1436 | 0 | if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && |
1437 | 0 | cpi->rc.is_src_frame_alt_ref) { |
1438 | 0 | yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); |
1439 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
1440 | 0 | &cm->frame_refs[ALTREF_FRAME - 1].sf); |
1441 | 0 | mi->ref_frame[0] = ALTREF_FRAME; |
1442 | 0 | y_sad_g = UINT_MAX; |
1443 | 0 | } else { |
1444 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
1445 | 0 | &cm->frame_refs[LAST_FRAME - 1].sf); |
1446 | 0 | mi->ref_frame[0] = LAST_FRAME; |
1447 | 0 | } |
1448 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
1449 | 0 | mi->sb_type = BLOCK_64X64; |
1450 | 0 | mi->mv[0].as_int = 0; |
1451 | 0 | mi->interp_filter = BILINEAR; |
1452 | |
|
1453 | 0 | if (cpi->oxcf.speed >= 8 && !low_res && |
1454 | 0 | x->content_state_sb != kVeryHighSad) { |
1455 | 0 | y_sad = cpi->fn_ptr[bsize].sdf( |
1456 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
1457 | 0 | xd->plane[0].pre[0].stride); |
1458 | 0 | } else { |
1459 | 0 | const MV dummy_mv = { 0, 0 }; |
1460 | 0 | y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, |
1461 | 0 | &dummy_mv); |
1462 | 0 | x->sb_use_mv_part = 1; |
1463 | 0 | x->sb_mvcol_part = mi->mv[0].as_mv.col; |
1464 | 0 | x->sb_mvrow_part = mi->mv[0].as_mv.row; |
1465 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
1466 | 0 | cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && |
1467 | 0 | cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && |
1468 | 0 | cm->width > 640 && cm->height > 480) { |
1469 | | // Disable split below 16x16 block size when scroll motion (horz or |
1470 | | // vert) is detected. |
1471 | | // TODO(marpan/jianj): Improve this condition: issue is that search |
1472 | | // range is hard-coded/limited in vp9_int_pro_motion_estimation() so |
1473 | | // scroll motion may not be detected here. |
1474 | 0 | if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || |
1475 | 0 | (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) && |
1476 | 0 | y_sad < 100000) { |
1477 | 0 | compute_minmax_variance = 0; |
1478 | 0 | thresholds[2] = INT64_MAX; |
1479 | 0 | } |
1480 | 0 | } |
1481 | 0 | } |
1482 | |
|
1483 | 0 | y_sad_last = y_sad; |
1484 | | // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad |
1485 | | // are close if short_circuit_low_temp_var is on. |
1486 | 0 | y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; |
1487 | 0 | if (y_sad_g < y_sad_thr) { |
1488 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
1489 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
1490 | 0 | mi->ref_frame[0] = GOLDEN_FRAME; |
1491 | 0 | mi->mv[0].as_int = 0; |
1492 | 0 | y_sad = y_sad_g; |
1493 | 0 | ref_frame_partition = GOLDEN_FRAME; |
1494 | 0 | } else { |
1495 | 0 | x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; |
1496 | 0 | ref_frame_partition = LAST_FRAME; |
1497 | 0 | } |
1498 | |
|
1499 | 0 | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
1500 | 0 | vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); |
1501 | |
|
1502 | 0 | if (cpi->use_skin_detection) |
1503 | 0 | x->sb_is_skin = skin_sb_split(cpi, low_res, mi_row, mi_col, force_split); |
1504 | |
|
1505 | 0 | d = xd->plane[0].dst.buf; |
1506 | 0 | dp = xd->plane[0].dst.stride; |
1507 | | |
1508 | | // If the y_sad is very small, take 64x64 as partition and exit. |
1509 | | // Don't check on boosted segment for now, as 64x64 is suppressed there. |
1510 | 0 | if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { |
1511 | 0 | const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
1512 | 0 | const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
1513 | 0 | if (mi_col + block_width / 2 < cm->mi_cols && |
1514 | 0 | mi_row + block_height / 2 < cm->mi_rows) { |
1515 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); |
1516 | 0 | x->variance_low[0] = 1; |
1517 | 0 | chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
1518 | 0 | if (cpi->sf.svc_use_lowres_part && |
1519 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1520 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1521 | 0 | if (cpi->sf.copy_partition_flag) { |
1522 | 0 | update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
1523 | 0 | } |
1524 | 0 | return 0; |
1525 | 0 | } |
1526 | 0 | } |
1527 | | |
1528 | | // If the y_sad is small enough, copy the partition of the superblock in the |
1529 | | // last frame to current frame only if the last frame is not a keyframe. |
1530 | | // Stop the copy every cpi->max_copied_frame to refresh the partition. |
1531 | | // TODO(jianj) : tune the threshold. |
1532 | 0 | if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy && |
1533 | 0 | copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { |
1534 | 0 | chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
1535 | 0 | if (cpi->sf.svc_use_lowres_part && |
1536 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1537 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1538 | 0 | return 0; |
1539 | 0 | } |
1540 | 0 | } else { |
1541 | 0 | d = VP9_VAR_OFFS; |
1542 | 0 | dp = 0; |
1543 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1544 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1545 | 0 | switch (xd->bd) { |
1546 | 0 | case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break; |
1547 | 0 | case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break; |
1548 | 0 | case 8: |
1549 | 0 | default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break; |
1550 | 0 | } |
1551 | 0 | } |
1552 | 0 | #endif // CONFIG_VP9_HIGHBITDEPTH |
1553 | 0 | } |
1554 | | |
1555 | 0 | if (low_res && threshold_4x4avg < INT64_MAX) |
1556 | 0 | CHECK_MEM_ERROR(&cm->error, vt2, vpx_calloc(16, sizeof(*vt2))); |
1557 | | // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances |
1558 | | // for splits. |
1559 | 0 | for (i = 0; i < 4; i++) { |
1560 | 0 | const int x32_idx = ((i & 1) << 5); |
1561 | 0 | const int y32_idx = ((i >> 1) << 5); |
1562 | 0 | const int i2 = i << 2; |
1563 | 0 | force_split[i + 1] = 0; |
1564 | 0 | avg_16x16[i] = 0; |
1565 | 0 | maxvar_16x16[i] = 0; |
1566 | 0 | minvar_16x16[i] = INT_MAX; |
1567 | 0 | for (j = 0; j < 4; j++) { |
1568 | 0 | const int x16_idx = x32_idx + ((j & 1) << 4); |
1569 | 0 | const int y16_idx = y32_idx + ((j >> 1) << 4); |
1570 | 0 | const int split_index = 5 + i2 + j; |
1571 | 0 | v16x16 *vst = &vt.split[i].split[j]; |
1572 | 0 | force_split[split_index] = 0; |
1573 | 0 | variance4x4downsample[i2 + j] = 0; |
1574 | 0 | if (!is_key_frame) { |
1575 | 0 | fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, |
1576 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1577 | 0 | xd->cur_buf->flags, |
1578 | 0 | #endif |
1579 | 0 | pixels_wide, pixels_high, is_key_frame); |
1580 | 0 | fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); |
1581 | 0 | get_variance(&vt.split[i].split[j].part_variances.none); |
1582 | 0 | avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; |
1583 | 0 | if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) |
1584 | 0 | minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; |
1585 | 0 | if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) |
1586 | 0 | maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; |
1587 | 0 | if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { |
1588 | | // 16X16 variance is above threshold for split, so force split to 8x8 |
1589 | | // for this 16x16 block (this also forces splits for upper levels). |
1590 | 0 | force_split[split_index] = 1; |
1591 | 0 | force_split[i + 1] = 1; |
1592 | 0 | force_split[0] = 1; |
1593 | 0 | } else if (compute_minmax_variance && |
1594 | 0 | vt.split[i].split[j].part_variances.none.variance > |
1595 | 0 | thresholds[1] && |
1596 | 0 | !cyclic_refresh_segment_id_boosted(segment_id)) { |
1597 | | // We have some nominal amount of 16x16 variance (based on average), |
1598 | | // compute the minmax over the 8x8 sub-blocks, and if above threshold, |
1599 | | // force split to 8x8 block for this 16x16 block. |
1600 | 0 | int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, |
1601 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1602 | 0 | xd->cur_buf->flags, |
1603 | 0 | #endif |
1604 | 0 | pixels_wide, pixels_high); |
1605 | 0 | int thresh_minmax = (int)cpi->vbp_threshold_minmax; |
1606 | 0 | if (x->content_state_sb == kVeryHighSad) |
1607 | 0 | thresh_minmax = thresh_minmax << 1; |
1608 | 0 | if (minmax > thresh_minmax) { |
1609 | 0 | force_split[split_index] = 1; |
1610 | 0 | force_split[i + 1] = 1; |
1611 | 0 | force_split[0] = 1; |
1612 | 0 | } |
1613 | 0 | } |
1614 | 0 | } |
1615 | 0 | if (is_key_frame || |
1616 | 0 | (low_res && vt.split[i].split[j].part_variances.none.variance > |
1617 | 0 | threshold_4x4avg)) { |
1618 | 0 | force_split[split_index] = 0; |
1619 | | // Go down to 4x4 down-sampling for variance. |
1620 | 0 | variance4x4downsample[i2 + j] = 1; |
1621 | 0 | for (k = 0; k < 4; k++) { |
1622 | 0 | int x8_idx = x16_idx + ((k & 1) << 3); |
1623 | 0 | int y8_idx = y16_idx + ((k >> 1) << 3); |
1624 | 0 | v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; |
1625 | 0 | fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, |
1626 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1627 | 0 | xd->cur_buf->flags, |
1628 | 0 | #endif |
1629 | 0 | pixels_wide, pixels_high, is_key_frame); |
1630 | 0 | } |
1631 | 0 | } |
1632 | 0 | } |
1633 | 0 | } |
1634 | 0 | if (cpi->noise_estimate.enabled) |
1635 | 0 | noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); |
1636 | | // Fill the rest of the variance tree by summing split partition values. |
1637 | 0 | avg_32x32 = 0; |
1638 | 0 | for (i = 0; i < 4; i++) { |
1639 | 0 | const int i2 = i << 2; |
1640 | 0 | for (j = 0; j < 4; j++) { |
1641 | 0 | if (variance4x4downsample[i2 + j] == 1) { |
1642 | 0 | v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j]; |
1643 | 0 | for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); |
1644 | 0 | fill_variance_tree(vtemp, BLOCK_16X16); |
1645 | | // If variance of this 16x16 block is above the threshold, force block |
1646 | | // to split. This also forces a split on the upper levels. |
1647 | 0 | get_variance(&vtemp->part_variances.none); |
1648 | 0 | if (vtemp->part_variances.none.variance > thresholds[2]) { |
1649 | 0 | force_split[5 + i2 + j] = 1; |
1650 | 0 | force_split[i + 1] = 1; |
1651 | 0 | force_split[0] = 1; |
1652 | 0 | } |
1653 | 0 | } |
1654 | 0 | } |
1655 | 0 | fill_variance_tree(&vt.split[i], BLOCK_32X32); |
1656 | | // If variance of this 32x32 block is above the threshold, or if its above |
1657 | | // (some threshold of) the average variance over the sub-16x16 blocks, then |
1658 | | // force this block to split. This also forces a split on the upper |
1659 | | // (64x64) level. |
1660 | 0 | if (!force_split[i + 1]) { |
1661 | 0 | get_variance(&vt.split[i].part_variances.none); |
1662 | 0 | var_32x32 = vt.split[i].part_variances.none.variance; |
1663 | 0 | max_var_32x32 = VPXMAX(var_32x32, max_var_32x32); |
1664 | 0 | min_var_32x32 = VPXMIN(var_32x32, min_var_32x32); |
1665 | 0 | if (vt.split[i].part_variances.none.variance > thresholds[1] || |
1666 | 0 | (!is_key_frame && |
1667 | 0 | vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && |
1668 | 0 | vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { |
1669 | 0 | force_split[i + 1] = 1; |
1670 | 0 | force_split[0] = 1; |
1671 | 0 | } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && |
1672 | 0 | (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && |
1673 | 0 | maxvar_16x16[i] > thresholds[1]) { |
1674 | 0 | force_split[i + 1] = 1; |
1675 | 0 | force_split[0] = 1; |
1676 | 0 | } |
1677 | 0 | avg_32x32 += var_32x32; |
1678 | 0 | } |
1679 | 0 | } |
1680 | 0 | if (!force_split[0]) { |
1681 | 0 | fill_variance_tree(&vt, BLOCK_64X64); |
1682 | 0 | get_variance(&vt.part_variances.none); |
1683 | | // If variance of this 64x64 block is above (some threshold of) the average |
1684 | | // variance over the sub-32x32 blocks, then force this block to split. |
1685 | | // Only checking this for noise level >= medium for now. |
1686 | 0 | if (!is_key_frame && noise_level >= kMedium && |
1687 | 0 | vt.part_variances.none.variance > (9 * avg_32x32) >> 5) |
1688 | 0 | force_split[0] = 1; |
1689 | | // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in |
1690 | | // a 64x64 block is greater than threshold and the maximum 32x32 variance is |
1691 | | // above a miniumum threshold, then force the split of a 64x64 block |
1692 | | // Only check this for low noise. |
1693 | 0 | else if (!is_key_frame && noise_level < kMedium && |
1694 | 0 | (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) && |
1695 | 0 | max_var_32x32 > thresholds[0] >> 1) |
1696 | 0 | force_split[0] = 1; |
1697 | 0 | } |
1698 | | |
1699 | | // Now go through the entire structure, splitting every block size until |
1700 | | // we get to one that's got a variance lower than our threshold. |
1701 | 0 | if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || |
1702 | 0 | !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, |
1703 | 0 | thresholds[0], BLOCK_16X16, force_split[0])) { |
1704 | 0 | for (i = 0; i < 4; ++i) { |
1705 | 0 | const int x32_idx = ((i & 1) << 2); |
1706 | 0 | const int y32_idx = ((i >> 1) << 2); |
1707 | 0 | const int i2 = i << 2; |
1708 | 0 | if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, |
1709 | 0 | (mi_row + y32_idx), (mi_col + x32_idx), |
1710 | 0 | thresholds[1], BLOCK_16X16, |
1711 | 0 | force_split[i + 1])) { |
1712 | 0 | for (j = 0; j < 4; ++j) { |
1713 | 0 | const int x16_idx = ((j & 1) << 1); |
1714 | 0 | const int y16_idx = ((j >> 1) << 1); |
1715 | | // For inter frames: if variance4x4downsample[] == 1 for this 16x16 |
1716 | | // block, then the variance is based on 4x4 down-sampling, so use vt2 |
1717 | | // in set_vt_partitioning(), otherwise use vt. |
1718 | 0 | v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1) |
1719 | 0 | ? &vt2[i2 + j] |
1720 | 0 | : &vt.split[i].split[j]; |
1721 | 0 | if (!set_vt_partitioning( |
1722 | 0 | cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, |
1723 | 0 | mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min, |
1724 | 0 | force_split[5 + i2 + j])) { |
1725 | 0 | for (k = 0; k < 4; ++k) { |
1726 | 0 | const int x8_idx = (k & 1); |
1727 | 0 | const int y8_idx = (k >> 1); |
1728 | 0 | if (use_4x4_partition) { |
1729 | 0 | if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], |
1730 | 0 | BLOCK_8X8, |
1731 | 0 | mi_row + y32_idx + y16_idx + y8_idx, |
1732 | 0 | mi_col + x32_idx + x16_idx + x8_idx, |
1733 | 0 | thresholds[3], BLOCK_8X8, 0)) { |
1734 | 0 | set_block_size( |
1735 | 0 | cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), |
1736 | 0 | (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4); |
1737 | 0 | } |
1738 | 0 | } else { |
1739 | 0 | set_block_size( |
1740 | 0 | cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), |
1741 | 0 | (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); |
1742 | 0 | } |
1743 | 0 | } |
1744 | 0 | } |
1745 | 0 | } |
1746 | 0 | } |
1747 | 0 | } |
1748 | 0 | } |
1749 | |
|
1750 | 0 | if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { |
1751 | 0 | update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
1752 | 0 | } |
1753 | |
|
1754 | 0 | if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && |
1755 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1756 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1757 | |
|
1758 | 0 | if (cpi->sf.short_circuit_low_temp_var) { |
1759 | 0 | set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, |
1760 | 0 | mi_col, mi_row); |
1761 | 0 | } |
1762 | |
|
1763 | 0 | chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
1764 | 0 | if (vt2) vpx_free(vt2); |
1765 | 0 | return 0; |
1766 | 0 | } |
1767 | | |
1768 | | #if !CONFIG_REALTIME_ONLY |
1769 | | static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, |
1770 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
1771 | 7.54M | int output_enabled) { |
1772 | 7.54M | int i, x_idx, y; |
1773 | 7.54M | VP9_COMMON *const cm = &cpi->common; |
1774 | 7.54M | RD_COUNTS *const rdc = &td->rd_counts; |
1775 | 7.54M | MACROBLOCK *const x = &td->mb; |
1776 | 7.54M | MACROBLOCKD *const xd = &x->e_mbd; |
1777 | 7.54M | struct macroblock_plane *const p = x->plane; |
1778 | 7.54M | struct macroblockd_plane *const pd = xd->plane; |
1779 | 7.54M | MODE_INFO *mi = &ctx->mic; |
1780 | 7.54M | MODE_INFO *const xdmi = xd->mi[0]; |
1781 | 7.54M | MODE_INFO *mi_addr = xd->mi[0]; |
1782 | 7.54M | const struct segmentation *const seg = &cm->seg; |
1783 | 7.54M | const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; |
1784 | 7.54M | const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; |
1785 | 7.54M | const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); |
1786 | 7.54M | const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); |
1787 | 7.54M | MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; |
1788 | 7.54M | int w, h; |
1789 | | |
1790 | 7.54M | const int mis = cm->mi_stride; |
1791 | 7.54M | const int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
1792 | 7.54M | const int mi_height = num_8x8_blocks_high_lookup[bsize]; |
1793 | 7.54M | int max_plane; |
1794 | | |
1795 | 7.54M | assert(mi->sb_type == bsize); |
1796 | | |
1797 | 7.54M | *mi_addr = *mi; |
1798 | 7.54M | *x->mbmi_ext = ctx->mbmi_ext; |
1799 | | |
1800 | | // If segmentation in use |
1801 | 7.54M | if (seg->enabled) { |
1802 | | // For in frame complexity AQ copy the segment id from the segment map. |
1803 | 0 | if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { |
1804 | 0 | const uint8_t *const map = |
1805 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
1806 | 0 | mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
1807 | 0 | } |
1808 | | // Else for cyclic refresh mode update the segment map, set the segment id |
1809 | | // and then update the quantizer. |
1810 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && |
1811 | 0 | cpi->cyclic_refresh->content_mode) { |
1812 | 0 | vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, |
1813 | 0 | ctx->rate, ctx->dist, x->skip, p); |
1814 | 0 | } |
1815 | 0 | } |
1816 | | |
1817 | 7.54M | max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; |
1818 | 19.6M | for (i = 0; i < max_plane; ++i) { |
1819 | 12.1M | p[i].coeff = ctx->coeff_pbuf[i][1]; |
1820 | 12.1M | p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; |
1821 | 12.1M | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; |
1822 | 12.1M | p[i].eobs = ctx->eobs_pbuf[i][1]; |
1823 | 12.1M | } |
1824 | | |
1825 | 18.0M | for (i = max_plane; i < MAX_MB_PLANE; ++i) { |
1826 | 10.4M | p[i].coeff = ctx->coeff_pbuf[i][2]; |
1827 | 10.4M | p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; |
1828 | 10.4M | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; |
1829 | 10.4M | p[i].eobs = ctx->eobs_pbuf[i][2]; |
1830 | 10.4M | } |
1831 | | |
1832 | | // Restore the coding context of the MB to that that was in place |
1833 | | // when the mode was picked for it |
1834 | 16.4M | for (y = 0; y < mi_height; y++) |
1835 | 21.5M | for (x_idx = 0; x_idx < mi_width; x_idx++) |
1836 | 12.7M | if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && |
1837 | 12.7M | (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { |
1838 | 12.4M | xd->mi[x_idx + y * mis] = mi_addr; |
1839 | 12.4M | } |
1840 | | |
1841 | 7.54M | if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); |
1842 | | |
1843 | 7.54M | if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { |
1844 | 945k | xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; |
1845 | 945k | xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; |
1846 | 945k | } |
1847 | | |
1848 | 7.54M | x->skip = ctx->skip; |
1849 | 7.54M | memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, |
1850 | 7.54M | sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); |
1851 | | |
1852 | 7.54M | if (!output_enabled) return; |
1853 | | |
1854 | | #if CONFIG_INTERNAL_STATS |
1855 | | if (frame_is_intra_only(cm)) { |
1856 | | static const int kf_mode_index[] = { |
1857 | | THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/, |
1858 | | THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/, |
1859 | | THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/, |
1860 | | THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/, |
1861 | | THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, |
1862 | | }; |
1863 | | ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; |
1864 | | } else { |
1865 | | // Note how often each mode chosen as best |
1866 | | ++cpi->mode_chosen_counts[ctx->best_mode_index]; |
1867 | | } |
1868 | | #endif |
1869 | 1.89M | if (!frame_is_intra_only(cm)) { |
1870 | 1.39M | if (is_inter_block(xdmi)) { |
1871 | 582k | vp9_update_mv_count(td); |
1872 | | |
1873 | 582k | if (cm->interp_filter == SWITCHABLE) { |
1874 | 328k | const int ctx_interp = get_pred_context_switchable_interp(xd); |
1875 | 328k | ++td->counts->switchable_interp[ctx_interp][xdmi->interp_filter]; |
1876 | 328k | } |
1877 | 582k | } |
1878 | | |
1879 | 1.39M | rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; |
1880 | 1.39M | rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; |
1881 | 1.39M | rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; |
1882 | | |
1883 | 6.97M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) |
1884 | 5.58M | rdc->filter_diff[i] += ctx->best_filter_diff[i]; |
1885 | 1.39M | } |
1886 | | |
1887 | 4.07M | for (h = 0; h < y_mis; ++h) { |
1888 | 2.17M | MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; |
1889 | 5.42M | for (w = 0; w < x_mis; ++w) { |
1890 | 3.24M | MV_REF *const mv = frame_mv + w; |
1891 | 3.24M | mv->ref_frame[0] = mi->ref_frame[0]; |
1892 | 3.24M | mv->ref_frame[1] = mi->ref_frame[1]; |
1893 | 3.24M | mv->mv[0].as_int = mi->mv[0].as_int; |
1894 | 3.24M | mv->mv[1].as_int = mi->mv[1].as_int; |
1895 | 3.24M | } |
1896 | 2.17M | } |
1897 | 1.89M | } |
1898 | | #endif // !CONFIG_REALTIME_ONLY |
1899 | | |
1900 | | void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, |
1901 | 29.1M | int mi_row, int mi_col) { |
1902 | 29.1M | uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; |
1903 | 29.1M | const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; |
1904 | 29.1M | int i; |
1905 | | |
1906 | | // Set current frame pointer. |
1907 | 29.1M | x->e_mbd.cur_buf = src; |
1908 | | |
1909 | 116M | for (i = 0; i < MAX_MB_PLANE; i++) |
1910 | 87.3M | setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, |
1911 | 87.3M | NULL, x->e_mbd.plane[i].subsampling_x, |
1912 | 87.3M | x->e_mbd.plane[i].subsampling_y); |
1913 | 29.1M | } |
1914 | | |
1915 | | static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, |
1916 | | INTERP_FILTER interp_filter, |
1917 | 0 | RD_COST *rd_cost, BLOCK_SIZE bsize) { |
1918 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
1919 | 0 | MODE_INFO *const mi = xd->mi[0]; |
1920 | 0 | INTERP_FILTER filter_ref; |
1921 | |
|
1922 | 0 | filter_ref = get_pred_context_switchable_interp(xd); |
1923 | 0 | if (interp_filter == BILINEAR) |
1924 | 0 | filter_ref = BILINEAR; |
1925 | 0 | else if (filter_ref == SWITCHABLE_FILTERS) |
1926 | 0 | filter_ref = EIGHTTAP; |
1927 | |
|
1928 | 0 | mi->sb_type = bsize; |
1929 | 0 | mi->mode = ZEROMV; |
1930 | 0 | mi->tx_size = |
1931 | 0 | VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); |
1932 | 0 | mi->skip = 1; |
1933 | 0 | mi->uv_mode = DC_PRED; |
1934 | 0 | mi->ref_frame[0] = LAST_FRAME; |
1935 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
1936 | 0 | mi->mv[0].as_int = 0; |
1937 | 0 | mi->interp_filter = filter_ref; |
1938 | |
|
1939 | 0 | xd->mi[0]->bmi[0].as_mv[0].as_int = 0; |
1940 | 0 | x->skip = 1; |
1941 | |
|
1942 | 0 | vp9_rd_cost_init(rd_cost); |
1943 | 0 | } |
1944 | | |
1945 | | #if !CONFIG_REALTIME_ONLY |
1946 | | static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, |
1947 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
1948 | 8.85M | AQ_MODE aq_mode) { |
1949 | 8.85M | VP9_COMMON *const cm = &cpi->common; |
1950 | 8.85M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
1951 | 8.85M | const uint8_t *const map = |
1952 | 8.85M | cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
1953 | | |
1954 | 8.85M | vp9_init_plane_quantizers(cpi, x); |
1955 | 8.85M | vpx_clear_system_state(); |
1956 | | |
1957 | 8.85M | if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { |
1958 | 8.85M | if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; |
1959 | 8.85M | } else if (aq_mode == PERCEPTUAL_AQ) { |
1960 | 0 | x->rdmult = x->cb_rdmult; |
1961 | 0 | } else if (aq_mode == CYCLIC_REFRESH_AQ) { |
1962 | | // If segment is boosted, use rdmult for that segment. |
1963 | 0 | if (cyclic_refresh_segment_id_boosted( |
1964 | 0 | get_segment_id(cm, map, bsize, mi_row, mi_col))) |
1965 | 0 | x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); |
1966 | 0 | } else { |
1967 | 0 | x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); |
1968 | 0 | } |
1969 | | |
1970 | 8.85M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
1971 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
1972 | 0 | } |
1973 | 8.85M | } |
1974 | | |
1975 | | static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, |
1976 | | MACROBLOCK *const x, int mi_row, int mi_col, |
1977 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
1978 | | PICK_MODE_CONTEXT *ctx, int rate_in_best_rd, |
1979 | 8.85M | int64_t dist_in_best_rd) { |
1980 | 8.85M | VP9_COMMON *const cm = &cpi->common; |
1981 | 8.85M | TileInfo *const tile_info = &tile_data->tile_info; |
1982 | 8.85M | MACROBLOCKD *const xd = &x->e_mbd; |
1983 | 8.85M | MODE_INFO *mi; |
1984 | 8.85M | struct macroblock_plane *const p = x->plane; |
1985 | 8.85M | struct macroblockd_plane *const pd = xd->plane; |
1986 | 8.85M | const AQ_MODE aq_mode = cpi->oxcf.aq_mode; |
1987 | 8.85M | int i, orig_rdmult; |
1988 | 8.85M | int64_t best_rd = INT64_MAX; |
1989 | | |
1990 | 8.85M | vpx_clear_system_state(); |
1991 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
1992 | | start_timing(cpi, rd_pick_sb_modes_time); |
1993 | | #endif |
1994 | | |
1995 | | // Use the lower precision, but faster, 32x32 fdct for mode selection. |
1996 | 8.85M | x->use_lp32x32fdct = 1; |
1997 | | |
1998 | 8.85M | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
1999 | 8.85M | mi = xd->mi[0]; |
2000 | 8.85M | mi->sb_type = bsize; |
2001 | | |
2002 | 35.4M | for (i = 0; i < MAX_MB_PLANE; ++i) { |
2003 | 26.5M | p[i].coeff = ctx->coeff_pbuf[i][0]; |
2004 | 26.5M | p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; |
2005 | 26.5M | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; |
2006 | 26.5M | p[i].eobs = ctx->eobs_pbuf[i][0]; |
2007 | 26.5M | } |
2008 | 8.85M | ctx->is_coded = 0; |
2009 | 8.85M | ctx->skippable = 0; |
2010 | 8.85M | ctx->pred_pixel_ready = 0; |
2011 | 8.85M | x->skip_recode = 0; |
2012 | | |
2013 | | // Set to zero to make sure we do not use the previous encoded frame stats |
2014 | 8.85M | mi->skip = 0; |
2015 | | |
2016 | 8.85M | #if CONFIG_VP9_HIGHBITDEPTH |
2017 | 8.85M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
2018 | 0 | x->source_variance = vp9_high_get_sby_perpixel_variance( |
2019 | 0 | cpi, &x->plane[0].src, bsize, xd->bd); |
2020 | 8.85M | } else { |
2021 | 8.85M | x->source_variance = |
2022 | 8.85M | vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); |
2023 | 8.85M | } |
2024 | | #else |
2025 | | x->source_variance = |
2026 | | vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); |
2027 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
2028 | | |
2029 | | // Save rdmult before it might be changed, so it can be restored later. |
2030 | 8.85M | orig_rdmult = x->rdmult; |
2031 | | |
2032 | 8.85M | if ((cpi->sf.tx_domain_thresh > 0.0) || |
2033 | 8.85M | (cpi->sf.trellis_opt_tx_rd.thresh > 0.0)) { |
2034 | 2.61M | double logvar = vp9_log_block_var(cpi, x, bsize); |
2035 | | // Check block complexity as part of decision on using pixel or transform |
2036 | | // domain distortion in rd tests. |
2037 | 2.61M | x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion && |
2038 | 2.61M | (logvar >= cpi->sf.tx_domain_thresh); |
2039 | | |
2040 | | // Store block complexity to decide on using quantized coefficient |
2041 | | // optimization inside the rd loop. |
2042 | 2.61M | x->log_block_src_var = logvar; |
2043 | 6.23M | } else { |
2044 | 6.23M | x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion; |
2045 | 6.23M | x->log_block_src_var = 0.0; |
2046 | 6.23M | } |
2047 | | |
2048 | 8.85M | set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); |
2049 | 8.85M | set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode); |
2050 | 8.85M | if (rate_in_best_rd < INT_MAX && dist_in_best_rd < INT64_MAX) { |
2051 | 8.41M | best_rd = vp9_calculate_rd_cost(x->rdmult, x->rddiv, rate_in_best_rd, |
2052 | 8.41M | dist_in_best_rd); |
2053 | 8.41M | } |
2054 | | |
2055 | | // Find best coding mode & reconstruct the MB so it is available |
2056 | | // as a predictor for MBs that follow in the SB |
2057 | 8.85M | if (frame_is_intra_only(cm)) { |
2058 | 2.85M | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); |
2059 | 5.99M | } else { |
2060 | 5.99M | if (bsize >= BLOCK_8X8) { |
2061 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2062 | | start_timing(cpi, vp9_rd_pick_inter_mode_sb_time); |
2063 | | #endif |
2064 | 3.25M | if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) |
2065 | 0 | vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, |
2066 | 0 | ctx, best_rd); |
2067 | 3.25M | else |
2068 | 3.25M | vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, |
2069 | 3.25M | bsize, ctx, best_rd); |
2070 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2071 | | end_timing(cpi, vp9_rd_pick_inter_mode_sb_time); |
2072 | | #endif |
2073 | 3.25M | } else { |
2074 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2075 | | start_timing(cpi, vp9_rd_pick_inter_mode_sub8x8_time); |
2076 | | #endif |
2077 | 2.74M | vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, |
2078 | 2.74M | bsize, ctx, best_rd); |
2079 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2080 | | end_timing(cpi, vp9_rd_pick_inter_mode_sub8x8_time); |
2081 | | #endif |
2082 | 2.74M | } |
2083 | 5.99M | } |
2084 | | |
2085 | | // Examine the resulting rate and for AQ mode 2 make a segment choice. |
2086 | 8.85M | if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && |
2087 | 8.85M | (bsize >= BLOCK_16X16) && |
2088 | 8.85M | (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || |
2089 | 0 | (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { |
2090 | 0 | vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); |
2091 | 0 | } |
2092 | | |
2093 | | // TODO(jingning) The rate-distortion optimization flow needs to be |
2094 | | // refactored to provide proper exit/return handle. |
2095 | 8.85M | if (rd_cost->rate == INT_MAX || rd_cost->dist == INT64_MAX) |
2096 | 2.75M | rd_cost->rdcost = INT64_MAX; |
2097 | 6.09M | else |
2098 | 6.09M | rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); |
2099 | | |
2100 | 8.85M | x->rdmult = orig_rdmult; |
2101 | | |
2102 | 8.85M | ctx->rate = rd_cost->rate; |
2103 | 8.85M | ctx->dist = rd_cost->dist; |
2104 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2105 | | end_timing(cpi, rd_pick_sb_modes_time); |
2106 | | #endif |
2107 | 8.85M | } |
2108 | | #endif // !CONFIG_REALTIME_ONLY |
2109 | | |
2110 | 1.89M | static void update_stats(VP9_COMMON *cm, ThreadData *td) { |
2111 | 1.89M | const MACROBLOCK *x = &td->mb; |
2112 | 1.89M | const MACROBLOCKD *const xd = &x->e_mbd; |
2113 | 1.89M | const MODE_INFO *const mi = xd->mi[0]; |
2114 | 1.89M | const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
2115 | 1.89M | const BLOCK_SIZE bsize = mi->sb_type; |
2116 | | |
2117 | 1.89M | if (!frame_is_intra_only(cm)) { |
2118 | 1.39M | FRAME_COUNTS *const counts = td->counts; |
2119 | 1.39M | const int inter_block = is_inter_block(mi); |
2120 | 1.39M | const int seg_ref_active = |
2121 | 1.39M | segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); |
2122 | 1.39M | if (!seg_ref_active) { |
2123 | 1.39M | counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; |
2124 | | // If the segment reference feature is enabled we have only a single |
2125 | | // reference frame allowed for the segment so exclude it from |
2126 | | // the reference frame counts used to work out probabilities. |
2127 | 1.39M | if (inter_block) { |
2128 | 582k | const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; |
2129 | 582k | if (cm->reference_mode == REFERENCE_MODE_SELECT) |
2130 | 0 | counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] |
2131 | 0 | [has_second_ref(mi)]++; |
2132 | | |
2133 | 582k | if (has_second_ref(mi)) { |
2134 | 0 | const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; |
2135 | 0 | const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); |
2136 | 0 | const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1]; |
2137 | 0 | counts->comp_ref[ctx][bit]++; |
2138 | 582k | } else { |
2139 | 582k | counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] |
2140 | 582k | [ref0 != LAST_FRAME]++; |
2141 | 582k | if (ref0 != LAST_FRAME) |
2142 | 212k | counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] |
2143 | 212k | [ref0 != GOLDEN_FRAME]++; |
2144 | 582k | } |
2145 | 582k | } |
2146 | 1.39M | } |
2147 | 1.39M | if (inter_block && |
2148 | 1.39M | !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { |
2149 | 582k | const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; |
2150 | 582k | if (bsize >= BLOCK_8X8) { |
2151 | 302k | const PREDICTION_MODE mode = mi->mode; |
2152 | 302k | ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; |
2153 | 302k | } else { |
2154 | 280k | const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; |
2155 | 280k | const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; |
2156 | 280k | int idx, idy; |
2157 | 828k | for (idy = 0; idy < 2; idy += num_4x4_h) { |
2158 | 1.56M | for (idx = 0; idx < 2; idx += num_4x4_w) { |
2159 | 1.01M | const int j = idy * 2 + idx; |
2160 | 1.01M | const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; |
2161 | 1.01M | ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; |
2162 | 1.01M | } |
2163 | 548k | } |
2164 | 280k | } |
2165 | 582k | } |
2166 | 1.39M | } |
2167 | 1.89M | } |
2168 | | |
2169 | | #if !CONFIG_REALTIME_ONLY |
2170 | | static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, |
2171 | | ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], |
2172 | | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], |
2173 | | PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], |
2174 | 9.08M | BLOCK_SIZE bsize) { |
2175 | 9.08M | MACROBLOCKD *const xd = &x->e_mbd; |
2176 | 9.08M | int p; |
2177 | 9.08M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
2178 | 9.08M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
2179 | 9.08M | int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
2180 | 9.08M | int mi_height = num_8x8_blocks_high_lookup[bsize]; |
2181 | 36.3M | for (p = 0; p < MAX_MB_PLANE; p++) { |
2182 | 27.2M | memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), |
2183 | 27.2M | a + num_4x4_blocks_wide * p, |
2184 | 27.2M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> |
2185 | 27.2M | xd->plane[p].subsampling_x); |
2186 | 27.2M | memcpy(xd->left_context[p] + |
2187 | 27.2M | ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), |
2188 | 27.2M | l + num_4x4_blocks_high * p, |
2189 | 27.2M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> |
2190 | 27.2M | xd->plane[p].subsampling_y); |
2191 | 27.2M | } |
2192 | 9.08M | memcpy(xd->above_seg_context + mi_col, sa, |
2193 | 9.08M | sizeof(*xd->above_seg_context) * mi_width); |
2194 | 9.08M | memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, |
2195 | 9.08M | sizeof(xd->left_seg_context[0]) * mi_height); |
2196 | 9.08M | } |
2197 | | |
2198 | | static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, |
2199 | | ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], |
2200 | | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], |
2201 | | PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], |
2202 | 3.41M | BLOCK_SIZE bsize) { |
2203 | 3.41M | const MACROBLOCKD *const xd = &x->e_mbd; |
2204 | 3.41M | int p; |
2205 | 3.41M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
2206 | 3.41M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
2207 | 3.41M | int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
2208 | 3.41M | int mi_height = num_8x8_blocks_high_lookup[bsize]; |
2209 | | |
2210 | | // buffer the above/left context information of the block in search. |
2211 | 13.6M | for (p = 0; p < MAX_MB_PLANE; ++p) { |
2212 | 10.2M | memcpy(a + num_4x4_blocks_wide * p, |
2213 | 10.2M | xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), |
2214 | 10.2M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> |
2215 | 10.2M | xd->plane[p].subsampling_x); |
2216 | 10.2M | memcpy(l + num_4x4_blocks_high * p, |
2217 | 10.2M | xd->left_context[p] + |
2218 | 10.2M | ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), |
2219 | 10.2M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> |
2220 | 10.2M | xd->plane[p].subsampling_y); |
2221 | 10.2M | } |
2222 | 3.41M | memcpy(sa, xd->above_seg_context + mi_col, |
2223 | 3.41M | sizeof(*xd->above_seg_context) * mi_width); |
2224 | 3.41M | memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), |
2225 | 3.41M | sizeof(xd->left_seg_context[0]) * mi_height); |
2226 | 3.41M | } |
2227 | | |
2228 | | static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, |
2229 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
2230 | | int output_enabled, BLOCK_SIZE bsize, |
2231 | 6.80M | PICK_MODE_CONTEXT *ctx) { |
2232 | 6.80M | MACROBLOCK *const x = &td->mb; |
2233 | 6.80M | set_offsets(cpi, tile, x, mi_row, mi_col, bsize); |
2234 | | |
2235 | 6.80M | if (cpi->sf.enable_tpl_model && |
2236 | 6.80M | (cpi->oxcf.aq_mode == NO_AQ || cpi->oxcf.aq_mode == PERCEPTUAL_AQ)) { |
2237 | 2.97M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
2238 | 2.97M | x->rdmult = x->cb_rdmult; |
2239 | 2.97M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
2240 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
2241 | 0 | } |
2242 | 2.97M | } |
2243 | | |
2244 | 6.80M | update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); |
2245 | 6.80M | encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); |
2246 | | |
2247 | 6.80M | if (output_enabled) { |
2248 | 1.89M | update_stats(&cpi->common, td); |
2249 | | |
2250 | 1.89M | (*tp)->token = EOSB_TOKEN; |
2251 | 1.89M | (*tp)++; |
2252 | 1.89M | } |
2253 | 6.80M | } |
2254 | | |
2255 | | static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, |
2256 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
2257 | 8.74M | int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
2258 | 8.74M | VP9_COMMON *const cm = &cpi->common; |
2259 | 8.74M | MACROBLOCK *const x = &td->mb; |
2260 | 8.74M | MACROBLOCKD *const xd = &x->e_mbd; |
2261 | | |
2262 | 8.74M | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
2263 | 8.74M | int ctx; |
2264 | 8.74M | PARTITION_TYPE partition; |
2265 | 8.74M | BLOCK_SIZE subsize = bsize; |
2266 | | |
2267 | 8.74M | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
2268 | | |
2269 | 8.23M | if (bsize >= BLOCK_8X8) { |
2270 | 8.23M | ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
2271 | 8.23M | subsize = get_subsize(bsize, pc_tree->partitioning); |
2272 | 8.23M | } else { |
2273 | 0 | ctx = 0; |
2274 | 0 | subsize = BLOCK_4X4; |
2275 | 0 | } |
2276 | | |
2277 | 8.23M | partition = partition_lookup[bsl][subsize]; |
2278 | 8.23M | if (output_enabled && bsize != BLOCK_4X4) |
2279 | 2.52M | td->counts->partition[ctx][partition]++; |
2280 | | |
2281 | 8.23M | switch (partition) { |
2282 | 2.54M | case PARTITION_NONE: |
2283 | 2.54M | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2284 | 2.54M | &pc_tree->none); |
2285 | 2.54M | break; |
2286 | 401k | case PARTITION_VERT: |
2287 | 401k | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2288 | 401k | &pc_tree->vertical[0]); |
2289 | 401k | if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
2290 | 40.3k | encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, |
2291 | 40.3k | subsize, &pc_tree->vertical[1]); |
2292 | 40.3k | } |
2293 | 401k | break; |
2294 | 692k | case PARTITION_HORZ: |
2295 | 692k | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2296 | 692k | &pc_tree->horizontal[0]); |
2297 | 692k | if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
2298 | 67.1k | encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, |
2299 | 67.1k | subsize, &pc_tree->horizontal[1]); |
2300 | 67.1k | } |
2301 | 692k | break; |
2302 | 4.59M | default: |
2303 | 4.59M | assert(partition == PARTITION_SPLIT); |
2304 | 4.59M | if (bsize == BLOCK_8X8) { |
2305 | 3.06M | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2306 | 3.06M | pc_tree->u.leaf_split[0]); |
2307 | 3.06M | } else { |
2308 | 1.53M | encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2309 | 1.53M | pc_tree->u.split[0]); |
2310 | 1.53M | encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
2311 | 1.53M | subsize, pc_tree->u.split[1]); |
2312 | 1.53M | encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
2313 | 1.53M | subsize, pc_tree->u.split[2]); |
2314 | 1.53M | encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, |
2315 | 1.53M | subsize, pc_tree->u.split[3]); |
2316 | 1.53M | } |
2317 | 4.59M | break; |
2318 | 8.23M | } |
2319 | | |
2320 | 8.23M | if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
2321 | 6.70M | update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
2322 | 8.23M | } |
2323 | | #endif // !CONFIG_REALTIME_ONLY |
2324 | | |
2325 | | // Check to see if the given partition size is allowed for a specified number |
2326 | | // of 8x8 block rows and columns remaining in the image. |
2327 | | // If not then return the largest allowed partition size |
2328 | | static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, |
2329 | 0 | int cols_left, int *bh, int *bw) { |
2330 | 0 | if (rows_left <= 0 || cols_left <= 0) { |
2331 | 0 | return VPXMIN(bsize, BLOCK_8X8); |
2332 | 0 | } else { |
2333 | 0 | for (; bsize > 0; bsize -= 3) { |
2334 | 0 | *bh = num_8x8_blocks_high_lookup[bsize]; |
2335 | 0 | *bw = num_8x8_blocks_wide_lookup[bsize]; |
2336 | 0 | if ((*bh <= rows_left) && (*bw <= cols_left)) { |
2337 | 0 | break; |
2338 | 0 | } |
2339 | 0 | } |
2340 | 0 | } |
2341 | 0 | return bsize; |
2342 | 0 | } |
2343 | | |
2344 | | static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in, |
2345 | | int bw_in, int row8x8_remaining, |
2346 | | int col8x8_remaining, BLOCK_SIZE bsize, |
2347 | 0 | MODE_INFO **mi_8x8) { |
2348 | 0 | int bh = bh_in; |
2349 | 0 | int r, c; |
2350 | 0 | for (r = 0; r < MI_BLOCK_SIZE; r += bh) { |
2351 | 0 | int bw = bw_in; |
2352 | 0 | for (c = 0; c < MI_BLOCK_SIZE; c += bw) { |
2353 | 0 | const int index = r * mis + c; |
2354 | 0 | mi_8x8[index] = mi + index; |
2355 | 0 | mi_8x8[index]->sb_type = find_partition_size( |
2356 | 0 | bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); |
2357 | 0 | } |
2358 | 0 | } |
2359 | 0 | } |
2360 | | |
2361 | | // This function attempts to set all mode info entries in a given SB64 |
2362 | | // to the same block partition size. |
2363 | | // However, at the bottom and right borders of the image the requested size |
2364 | | // may not be allowed in which case this code attempts to choose the largest |
2365 | | // allowable partition. |
2366 | | static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, |
2367 | | MODE_INFO **mi_8x8, int mi_row, int mi_col, |
2368 | 0 | BLOCK_SIZE bsize) { |
2369 | 0 | VP9_COMMON *const cm = &cpi->common; |
2370 | 0 | const int mis = cm->mi_stride; |
2371 | 0 | const int row8x8_remaining = tile->mi_row_end - mi_row; |
2372 | 0 | const int col8x8_remaining = tile->mi_col_end - mi_col; |
2373 | 0 | int block_row, block_col; |
2374 | 0 | MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; |
2375 | 0 | int bh = num_8x8_blocks_high_lookup[bsize]; |
2376 | 0 | int bw = num_8x8_blocks_wide_lookup[bsize]; |
2377 | |
|
2378 | 0 | assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); |
2379 | | |
2380 | | // Apply the requested partition size to the SB64 if it is all "in image" |
2381 | 0 | if ((col8x8_remaining >= MI_BLOCK_SIZE) && |
2382 | 0 | (row8x8_remaining >= MI_BLOCK_SIZE)) { |
2383 | 0 | for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { |
2384 | 0 | for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { |
2385 | 0 | int index = block_row * mis + block_col; |
2386 | 0 | mi_8x8[index] = mi_upper_left + index; |
2387 | 0 | mi_8x8[index]->sb_type = bsize; |
2388 | 0 | } |
2389 | 0 | } |
2390 | 0 | } else { |
2391 | | // Else this is a partial SB64. |
2392 | 0 | set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, |
2393 | 0 | col8x8_remaining, bsize, mi_8x8); |
2394 | 0 | } |
2395 | 0 | } |
2396 | | |
2397 | | static const struct { |
2398 | | int row; |
2399 | | int col; |
2400 | | } coord_lookup[16] = { |
2401 | | // 32x32 index = 0 |
2402 | | { 0, 0 }, |
2403 | | { 0, 2 }, |
2404 | | { 2, 0 }, |
2405 | | { 2, 2 }, |
2406 | | // 32x32 index = 1 |
2407 | | { 0, 4 }, |
2408 | | { 0, 6 }, |
2409 | | { 2, 4 }, |
2410 | | { 2, 6 }, |
2411 | | // 32x32 index = 2 |
2412 | | { 4, 0 }, |
2413 | | { 4, 2 }, |
2414 | | { 6, 0 }, |
2415 | | { 6, 2 }, |
2416 | | // 32x32 index = 3 |
2417 | | { 4, 4 }, |
2418 | | { 4, 6 }, |
2419 | | { 6, 4 }, |
2420 | | { 6, 6 }, |
2421 | | }; |
2422 | | |
2423 | | static void set_source_var_based_partition(VP9_COMP *cpi, |
2424 | | const TileInfo *const tile, |
2425 | | MACROBLOCK *const x, |
2426 | | MODE_INFO **mi_8x8, int mi_row, |
2427 | 0 | int mi_col) { |
2428 | 0 | VP9_COMMON *const cm = &cpi->common; |
2429 | 0 | const int mis = cm->mi_stride; |
2430 | 0 | const int row8x8_remaining = tile->mi_row_end - mi_row; |
2431 | 0 | const int col8x8_remaining = tile->mi_col_end - mi_col; |
2432 | 0 | MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; |
2433 | |
|
2434 | 0 | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
2435 | |
|
2436 | 0 | assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); |
2437 | | |
2438 | | // In-image SB64 |
2439 | 0 | if ((col8x8_remaining >= MI_BLOCK_SIZE) && |
2440 | 0 | (row8x8_remaining >= MI_BLOCK_SIZE)) { |
2441 | 0 | int i, j; |
2442 | 0 | int index; |
2443 | 0 | Diff d32[4]; |
2444 | 0 | const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1); |
2445 | 0 | int is_larger_better = 0; |
2446 | 0 | int use32x32 = 0; |
2447 | 0 | unsigned int thr = cpi->source_var_thresh; |
2448 | |
|
2449 | 0 | memset(d32, 0, sizeof(d32)); |
2450 | |
|
2451 | 0 | for (i = 0; i < 4; i++) { |
2452 | 0 | Diff *d16[4]; |
2453 | |
|
2454 | 0 | for (j = 0; j < 4; j++) { |
2455 | 0 | int b_mi_row = coord_lookup[i * 4 + j].row; |
2456 | 0 | int b_mi_col = coord_lookup[i * 4 + j].col; |
2457 | 0 | int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2; |
2458 | |
|
2459 | 0 | d16[j] = cpi->source_diff_var + offset + boffset; |
2460 | |
|
2461 | 0 | index = b_mi_row * mis + b_mi_col; |
2462 | 0 | mi_8x8[index] = mi_upper_left + index; |
2463 | 0 | mi_8x8[index]->sb_type = BLOCK_16X16; |
2464 | | |
2465 | | // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition |
2466 | | // size to further improve quality. |
2467 | 0 | } |
2468 | |
|
2469 | 0 | is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) && |
2470 | 0 | (d16[2]->var < thr) && (d16[3]->var < thr); |
2471 | | |
2472 | | // Use 32x32 partition |
2473 | 0 | if (is_larger_better) { |
2474 | 0 | use32x32 += 1; |
2475 | |
|
2476 | 0 | for (j = 0; j < 4; j++) { |
2477 | 0 | d32[i].sse += d16[j]->sse; |
2478 | 0 | d32[i].sum += d16[j]->sum; |
2479 | 0 | } |
2480 | |
|
2481 | 0 | d32[i].var = |
2482 | 0 | (unsigned int)(d32[i].sse - |
2483 | 0 | (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >> |
2484 | 0 | 10)); |
2485 | |
|
2486 | 0 | index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col; |
2487 | 0 | mi_8x8[index] = mi_upper_left + index; |
2488 | 0 | mi_8x8[index]->sb_type = BLOCK_32X32; |
2489 | 0 | } |
2490 | 0 | } |
2491 | |
|
2492 | 0 | if (use32x32 == 4) { |
2493 | 0 | thr <<= 1; |
2494 | 0 | is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) && |
2495 | 0 | (d32[2].var < thr) && (d32[3].var < thr); |
2496 | | |
2497 | | // Use 64x64 partition |
2498 | 0 | if (is_larger_better) { |
2499 | 0 | mi_8x8[0] = mi_upper_left; |
2500 | 0 | mi_8x8[0]->sb_type = BLOCK_64X64; |
2501 | 0 | } |
2502 | 0 | } |
2503 | 0 | } else { // partial in-image SB64 |
2504 | 0 | int bh = num_8x8_blocks_high_lookup[BLOCK_16X16]; |
2505 | 0 | int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16]; |
2506 | 0 | set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, |
2507 | 0 | col8x8_remaining, BLOCK_16X16, mi_8x8); |
2508 | 0 | } |
2509 | 0 | } |
2510 | | |
2511 | | static void update_state_rt(VP9_COMP *cpi, ThreadData *td, |
2512 | | PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, |
2513 | 0 | int bsize) { |
2514 | 0 | VP9_COMMON *const cm = &cpi->common; |
2515 | 0 | MACROBLOCK *const x = &td->mb; |
2516 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2517 | 0 | MODE_INFO *const mi = xd->mi[0]; |
2518 | 0 | struct macroblock_plane *const p = x->plane; |
2519 | 0 | const struct segmentation *const seg = &cm->seg; |
2520 | 0 | const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; |
2521 | 0 | const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; |
2522 | 0 | const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); |
2523 | 0 | const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); |
2524 | |
|
2525 | 0 | *(xd->mi[0]) = ctx->mic; |
2526 | 0 | *(x->mbmi_ext) = ctx->mbmi_ext; |
2527 | |
|
2528 | 0 | if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled || |
2529 | 0 | cpi->active_map.enabled)) { |
2530 | | // Setting segmentation map for cyclic_refresh. |
2531 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && |
2532 | 0 | cpi->cyclic_refresh->content_mode) { |
2533 | 0 | vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, |
2534 | 0 | ctx->rate, ctx->dist, x->skip, p); |
2535 | 0 | } else { |
2536 | 0 | const uint8_t *const map = |
2537 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
2538 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
2539 | 0 | } |
2540 | 0 | vp9_init_plane_quantizers(cpi, x); |
2541 | 0 | } |
2542 | |
|
2543 | 0 | if (is_inter_block(mi)) { |
2544 | 0 | vp9_update_mv_count(td); |
2545 | 0 | if (cm->interp_filter == SWITCHABLE) { |
2546 | 0 | const int pred_ctx = get_pred_context_switchable_interp(xd); |
2547 | 0 | ++td->counts->switchable_interp[pred_ctx][mi->interp_filter]; |
2548 | 0 | } |
2549 | |
|
2550 | 0 | if (mi->sb_type < BLOCK_8X8) { |
2551 | 0 | mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; |
2552 | 0 | mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; |
2553 | 0 | } |
2554 | 0 | } |
2555 | |
|
2556 | 0 | if (cm->use_prev_frame_mvs || !cm->error_resilient_mode || |
2557 | 0 | (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 && |
2558 | 0 | cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { |
2559 | 0 | MV_REF *const frame_mvs = |
2560 | 0 | cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; |
2561 | 0 | int w, h; |
2562 | |
|
2563 | 0 | for (h = 0; h < y_mis; ++h) { |
2564 | 0 | MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; |
2565 | 0 | for (w = 0; w < x_mis; ++w) { |
2566 | 0 | MV_REF *const mv = frame_mv + w; |
2567 | 0 | mv->ref_frame[0] = mi->ref_frame[0]; |
2568 | 0 | mv->ref_frame[1] = mi->ref_frame[1]; |
2569 | 0 | mv->mv[0].as_int = mi->mv[0].as_int; |
2570 | 0 | mv->mv[1].as_int = mi->mv[1].as_int; |
2571 | 0 | } |
2572 | 0 | } |
2573 | 0 | } |
2574 | |
|
2575 | 0 | x->skip = ctx->skip; |
2576 | 0 | x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0]; |
2577 | 0 | } |
2578 | | |
2579 | | static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, |
2580 | | const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, |
2581 | | int mi_col, int output_enabled, BLOCK_SIZE bsize, |
2582 | 0 | PICK_MODE_CONTEXT *ctx) { |
2583 | 0 | MACROBLOCK *const x = &td->mb; |
2584 | 0 | set_offsets(cpi, tile, x, mi_row, mi_col, bsize); |
2585 | 0 | update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); |
2586 | |
|
2587 | 0 | encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); |
2588 | 0 | update_stats(&cpi->common, td); |
2589 | |
|
2590 | 0 | (*tp)->token = EOSB_TOKEN; |
2591 | 0 | (*tp)++; |
2592 | 0 | } |
2593 | | |
2594 | | static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, |
2595 | | const TileInfo *const tile, TOKENEXTRA **tp, |
2596 | | int mi_row, int mi_col, int output_enabled, |
2597 | 0 | BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
2598 | 0 | VP9_COMMON *const cm = &cpi->common; |
2599 | 0 | MACROBLOCK *const x = &td->mb; |
2600 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2601 | |
|
2602 | 0 | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
2603 | 0 | int ctx; |
2604 | 0 | PARTITION_TYPE partition; |
2605 | 0 | BLOCK_SIZE subsize; |
2606 | |
|
2607 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
2608 | | |
2609 | 0 | if (bsize >= BLOCK_8X8) { |
2610 | 0 | const int idx_str = xd->mi_stride * mi_row + mi_col; |
2611 | 0 | MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; |
2612 | 0 | ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
2613 | 0 | subsize = mi_8x8[0]->sb_type; |
2614 | 0 | } else { |
2615 | 0 | ctx = 0; |
2616 | 0 | subsize = BLOCK_4X4; |
2617 | 0 | } |
2618 | |
|
2619 | 0 | partition = partition_lookup[bsl][subsize]; |
2620 | 0 | if (output_enabled && bsize != BLOCK_4X4) |
2621 | 0 | td->counts->partition[ctx][partition]++; |
2622 | |
|
2623 | 0 | switch (partition) { |
2624 | 0 | case PARTITION_NONE: |
2625 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2626 | 0 | &pc_tree->none); |
2627 | 0 | break; |
2628 | 0 | case PARTITION_VERT: |
2629 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2630 | 0 | &pc_tree->vertical[0]); |
2631 | 0 | if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
2632 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
2633 | 0 | subsize, &pc_tree->vertical[1]); |
2634 | 0 | } |
2635 | 0 | break; |
2636 | 0 | case PARTITION_HORZ: |
2637 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2638 | 0 | &pc_tree->horizontal[0]); |
2639 | 0 | if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
2640 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
2641 | 0 | subsize, &pc_tree->horizontal[1]); |
2642 | 0 | } |
2643 | 0 | break; |
2644 | 0 | default: |
2645 | 0 | assert(partition == PARTITION_SPLIT); |
2646 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
2647 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2648 | 0 | pc_tree->u.split[0]); |
2649 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
2650 | 0 | subsize, pc_tree->u.split[1]); |
2651 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
2652 | 0 | subsize, pc_tree->u.split[2]); |
2653 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, |
2654 | 0 | output_enabled, subsize, pc_tree->u.split[3]); |
2655 | 0 | break; |
2656 | 0 | } |
2657 | | |
2658 | 0 | if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
2659 | 0 | update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
2660 | 0 | } |
2661 | | |
2662 | | #if !CONFIG_REALTIME_ONLY |
2663 | | static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, |
2664 | | TileDataEnc *tile_data, MODE_INFO **mi_8x8, |
2665 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
2666 | | BLOCK_SIZE bsize, int *rate, int64_t *dist, |
2667 | 0 | int do_recon, PC_TREE *pc_tree) { |
2668 | 0 | VP9_COMMON *const cm = &cpi->common; |
2669 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
2670 | 0 | MACROBLOCK *const x = &td->mb; |
2671 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2672 | 0 | const int mis = cm->mi_stride; |
2673 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
2674 | 0 | const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; |
2675 | 0 | const int bss = (1 << bsl) / 4; |
2676 | 0 | int i, pl; |
2677 | 0 | PARTITION_TYPE partition = PARTITION_NONE; |
2678 | 0 | BLOCK_SIZE subsize; |
2679 | 0 | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; |
2680 | 0 | PARTITION_CONTEXT sl[8], sa[8]; |
2681 | 0 | RD_COST last_part_rdc, none_rdc, chosen_rdc; |
2682 | 0 | BLOCK_SIZE sub_subsize = BLOCK_4X4; |
2683 | 0 | int splits_below = 0; |
2684 | 0 | BLOCK_SIZE bs_type = mi_8x8[0]->sb_type; |
2685 | 0 | int do_partition_search = 1; |
2686 | 0 | PICK_MODE_CONTEXT *ctx = &pc_tree->none; |
2687 | |
|
2688 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
2689 | | |
2690 | 0 | assert(num_4x4_blocks_wide_lookup[bsize] == |
2691 | 0 | num_4x4_blocks_high_lookup[bsize]); |
2692 | |
|
2693 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2694 | 0 | vp9_rd_cost_reset(&none_rdc); |
2695 | 0 | vp9_rd_cost_reset(&chosen_rdc); |
2696 | |
|
2697 | 0 | partition = partition_lookup[bsl][bs_type]; |
2698 | 0 | subsize = get_subsize(bsize, partition); |
2699 | |
|
2700 | 0 | pc_tree->partitioning = partition; |
2701 | 0 | save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2702 | |
|
2703 | 0 | if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) { |
2704 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
2705 | 0 | x->mb_energy = vp9_block_energy(cpi, x, bsize); |
2706 | 0 | } |
2707 | |
|
2708 | 0 | if (do_partition_search && |
2709 | 0 | cpi->sf.partition_search_type == SEARCH_PARTITION && |
2710 | 0 | cpi->sf.adjust_partitioning_from_last_frame) { |
2711 | | // Check if any of the sub blocks are further split. |
2712 | 0 | if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { |
2713 | 0 | sub_subsize = get_subsize(subsize, PARTITION_SPLIT); |
2714 | 0 | splits_below = 1; |
2715 | 0 | for (i = 0; i < 4; i++) { |
2716 | 0 | int jj = i >> 1, ii = i & 0x01; |
2717 | 0 | MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; |
2718 | 0 | if (this_mi && this_mi->sb_type >= sub_subsize) { |
2719 | 0 | splits_below = 0; |
2720 | 0 | } |
2721 | 0 | } |
2722 | 0 | } |
2723 | | |
2724 | | // If partition is not none try none unless each of the 4 splits are split |
2725 | | // even further.. |
2726 | 0 | if (partition != PARTITION_NONE && !splits_below && |
2727 | 0 | mi_row + (mi_step >> 1) < cm->mi_rows && |
2728 | 0 | mi_col + (mi_step >> 1) < cm->mi_cols) { |
2729 | 0 | pc_tree->partitioning = PARTITION_NONE; |
2730 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, |
2731 | 0 | INT_MAX, INT64_MAX); |
2732 | |
|
2733 | 0 | pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
2734 | |
|
2735 | 0 | if (none_rdc.rate < INT_MAX) { |
2736 | 0 | none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
2737 | 0 | none_rdc.rdcost = |
2738 | 0 | RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); |
2739 | 0 | } |
2740 | |
|
2741 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2742 | 0 | mi_8x8[0]->sb_type = bs_type; |
2743 | 0 | pc_tree->partitioning = partition; |
2744 | 0 | } |
2745 | 0 | } |
2746 | |
|
2747 | 0 | switch (partition) { |
2748 | 0 | case PARTITION_NONE: |
2749 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, |
2750 | 0 | ctx, INT_MAX, INT64_MAX); |
2751 | 0 | break; |
2752 | 0 | case PARTITION_HORZ: |
2753 | 0 | pc_tree->horizontal[0].skip_ref_frame_mask = 0; |
2754 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, |
2755 | 0 | subsize, &pc_tree->horizontal[0], INT_MAX, INT64_MAX); |
2756 | 0 | if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && |
2757 | 0 | mi_row + (mi_step >> 1) < cm->mi_rows) { |
2758 | 0 | RD_COST tmp_rdc; |
2759 | 0 | PICK_MODE_CONTEXT *hctx = &pc_tree->horizontal[0]; |
2760 | 0 | vp9_rd_cost_init(&tmp_rdc); |
2761 | 0 | update_state(cpi, td, hctx, mi_row, mi_col, subsize, 0); |
2762 | 0 | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, hctx); |
2763 | 0 | pc_tree->horizontal[1].skip_ref_frame_mask = 0; |
2764 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, |
2765 | 0 | &tmp_rdc, subsize, &pc_tree->horizontal[1], INT_MAX, |
2766 | 0 | INT64_MAX); |
2767 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2768 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2769 | 0 | break; |
2770 | 0 | } |
2771 | 0 | last_part_rdc.rate += tmp_rdc.rate; |
2772 | 0 | last_part_rdc.dist += tmp_rdc.dist; |
2773 | 0 | last_part_rdc.rdcost += tmp_rdc.rdcost; |
2774 | 0 | } |
2775 | 0 | break; |
2776 | 0 | case PARTITION_VERT: |
2777 | 0 | pc_tree->vertical[0].skip_ref_frame_mask = 0; |
2778 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, |
2779 | 0 | subsize, &pc_tree->vertical[0], INT_MAX, INT64_MAX); |
2780 | 0 | if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && |
2781 | 0 | mi_col + (mi_step >> 1) < cm->mi_cols) { |
2782 | 0 | RD_COST tmp_rdc; |
2783 | 0 | PICK_MODE_CONTEXT *vctx = &pc_tree->vertical[0]; |
2784 | 0 | vp9_rd_cost_init(&tmp_rdc); |
2785 | 0 | update_state(cpi, td, vctx, mi_row, mi_col, subsize, 0); |
2786 | 0 | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, vctx); |
2787 | 0 | pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; |
2788 | 0 | rd_pick_sb_modes( |
2789 | 0 | cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, |
2790 | 0 | subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT_MAX, INT64_MAX); |
2791 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2792 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2793 | 0 | break; |
2794 | 0 | } |
2795 | 0 | last_part_rdc.rate += tmp_rdc.rate; |
2796 | 0 | last_part_rdc.dist += tmp_rdc.dist; |
2797 | 0 | last_part_rdc.rdcost += tmp_rdc.rdcost; |
2798 | 0 | } |
2799 | 0 | break; |
2800 | 0 | default: |
2801 | 0 | assert(partition == PARTITION_SPLIT); |
2802 | 0 | if (bsize == BLOCK_8X8) { |
2803 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, |
2804 | 0 | subsize, pc_tree->u.leaf_split[0], INT_MAX, INT64_MAX); |
2805 | 0 | break; |
2806 | 0 | } |
2807 | 0 | last_part_rdc.rate = 0; |
2808 | 0 | last_part_rdc.dist = 0; |
2809 | 0 | last_part_rdc.rdcost = 0; |
2810 | 0 | for (i = 0; i < 4; i++) { |
2811 | 0 | int x_idx = (i & 1) * (mi_step >> 1); |
2812 | 0 | int y_idx = (i >> 1) * (mi_step >> 1); |
2813 | 0 | int jj = i >> 1, ii = i & 0x01; |
2814 | 0 | RD_COST tmp_rdc; |
2815 | 0 | if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) |
2816 | 0 | continue; |
2817 | | |
2818 | 0 | vp9_rd_cost_init(&tmp_rdc); |
2819 | 0 | rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss, |
2820 | 0 | tp, mi_row + y_idx, mi_col + x_idx, subsize, |
2821 | 0 | &tmp_rdc.rate, &tmp_rdc.dist, i != 3, |
2822 | 0 | pc_tree->u.split[i]); |
2823 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2824 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2825 | 0 | break; |
2826 | 0 | } |
2827 | 0 | last_part_rdc.rate += tmp_rdc.rate; |
2828 | 0 | last_part_rdc.dist += tmp_rdc.dist; |
2829 | 0 | } |
2830 | 0 | break; |
2831 | 0 | } |
2832 | | |
2833 | 0 | pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
2834 | 0 | if (last_part_rdc.rate < INT_MAX) { |
2835 | 0 | last_part_rdc.rate += cpi->partition_cost[pl][partition]; |
2836 | 0 | last_part_rdc.rdcost = |
2837 | 0 | RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); |
2838 | 0 | } |
2839 | |
|
2840 | 0 | if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && |
2841 | 0 | cpi->sf.partition_search_type == SEARCH_PARTITION && |
2842 | 0 | partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && |
2843 | 0 | (mi_row + mi_step < cm->mi_rows || |
2844 | 0 | mi_row + (mi_step >> 1) == cm->mi_rows) && |
2845 | 0 | (mi_col + mi_step < cm->mi_cols || |
2846 | 0 | mi_col + (mi_step >> 1) == cm->mi_cols)) { |
2847 | 0 | BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); |
2848 | 0 | chosen_rdc.rate = 0; |
2849 | 0 | chosen_rdc.dist = 0; |
2850 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2851 | 0 | pc_tree->partitioning = PARTITION_SPLIT; |
2852 | | |
2853 | | // Split partition. |
2854 | 0 | for (i = 0; i < 4; i++) { |
2855 | 0 | int x_idx = (i & 1) * (mi_step >> 1); |
2856 | 0 | int y_idx = (i >> 1) * (mi_step >> 1); |
2857 | 0 | RD_COST tmp_rdc; |
2858 | |
|
2859 | 0 | if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) |
2860 | 0 | continue; |
2861 | | |
2862 | 0 | save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2863 | 0 | pc_tree->u.split[i]->partitioning = PARTITION_NONE; |
2864 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, |
2865 | 0 | &tmp_rdc, split_subsize, &pc_tree->u.split[i]->none, |
2866 | 0 | INT_MAX, INT64_MAX); |
2867 | |
|
2868 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2869 | |
|
2870 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2871 | 0 | vp9_rd_cost_reset(&chosen_rdc); |
2872 | 0 | break; |
2873 | 0 | } |
2874 | | |
2875 | 0 | chosen_rdc.rate += tmp_rdc.rate; |
2876 | 0 | chosen_rdc.dist += tmp_rdc.dist; |
2877 | |
|
2878 | 0 | if (i != 3) |
2879 | 0 | encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, |
2880 | 0 | split_subsize, pc_tree->u.split[i]); |
2881 | |
|
2882 | 0 | pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, |
2883 | 0 | split_subsize); |
2884 | 0 | chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
2885 | 0 | } |
2886 | 0 | pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
2887 | 0 | if (chosen_rdc.rate < INT_MAX) { |
2888 | 0 | chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; |
2889 | 0 | chosen_rdc.rdcost = |
2890 | 0 | RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); |
2891 | 0 | } |
2892 | 0 | } |
2893 | | |
2894 | | // If last_part is better set the partitioning to that. |
2895 | 0 | if (last_part_rdc.rdcost < chosen_rdc.rdcost) { |
2896 | 0 | mi_8x8[0]->sb_type = bsize; |
2897 | 0 | if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; |
2898 | 0 | chosen_rdc = last_part_rdc; |
2899 | 0 | } |
2900 | | // If none was better set the partitioning to that. |
2901 | 0 | if (none_rdc.rdcost < chosen_rdc.rdcost) { |
2902 | 0 | if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; |
2903 | 0 | chosen_rdc = none_rdc; |
2904 | 0 | } |
2905 | |
|
2906 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2907 | | |
2908 | | // We must have chosen a partitioning and encoding or we'll fail later on. |
2909 | | // No other opportunities for success. |
2910 | 0 | if (bsize == BLOCK_64X64) |
2911 | 0 | assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); |
2912 | |
|
2913 | 0 | if (do_recon) { |
2914 | 0 | int output_enabled = (bsize == BLOCK_64X64); |
2915 | 0 | encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, |
2916 | 0 | pc_tree); |
2917 | 0 | } |
2918 | |
|
2919 | 0 | *rate = chosen_rdc.rate; |
2920 | 0 | *dist = chosen_rdc.dist; |
2921 | 0 | } |
2922 | | |
2923 | | static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { |
2924 | | BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, |
2925 | | BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, |
2926 | | BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 |
2927 | | }; |
2928 | | |
2929 | | static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { |
2930 | | BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, |
2931 | | BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, |
2932 | | BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 |
2933 | | }; |
2934 | | |
2935 | | // Look at all the mode_info entries for blocks that are part of this |
2936 | | // partition and find the min and max values for sb_type. |
2937 | | // At the moment this is designed to work on a 64x64 SB but could be |
2938 | | // adjusted to use a size parameter. |
2939 | | // |
2940 | | // The min and max are assumed to have been initialized prior to calling this |
2941 | | // function so repeat calls can accumulate a min and max of more than one sb64. |
2942 | | static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, |
2943 | | BLOCK_SIZE *min_block_size, |
2944 | | BLOCK_SIZE *max_block_size, |
2945 | 0 | int bs_hist[BLOCK_SIZES]) { |
2946 | 0 | int sb_width_in_blocks = MI_BLOCK_SIZE; |
2947 | 0 | int sb_height_in_blocks = MI_BLOCK_SIZE; |
2948 | 0 | int i, j; |
2949 | 0 | int index = 0; |
2950 | | |
2951 | | // Check the sb_type for each block that belongs to this region. |
2952 | 0 | for (i = 0; i < sb_height_in_blocks; ++i) { |
2953 | 0 | for (j = 0; j < sb_width_in_blocks; ++j) { |
2954 | 0 | MODE_INFO *mi = mi_8x8[index + j]; |
2955 | 0 | BLOCK_SIZE sb_type = mi ? mi->sb_type : 0; |
2956 | 0 | bs_hist[sb_type]++; |
2957 | 0 | *min_block_size = VPXMIN(*min_block_size, sb_type); |
2958 | 0 | *max_block_size = VPXMAX(*max_block_size, sb_type); |
2959 | 0 | } |
2960 | 0 | index += xd->mi_stride; |
2961 | 0 | } |
2962 | 0 | } |
2963 | | |
2964 | | // Next square block size less or equal than current block size. |
2965 | | static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { |
2966 | | BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, |
2967 | | BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, |
2968 | | BLOCK_32X32, BLOCK_32X32, BLOCK_64X64 |
2969 | | }; |
2970 | | |
2971 | | // Look at neighboring blocks and set a min and max partition size based on |
2972 | | // what they chose. |
2973 | | static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, |
2974 | | MACROBLOCKD *const xd, int mi_row, |
2975 | | int mi_col, BLOCK_SIZE *min_block_size, |
2976 | 0 | BLOCK_SIZE *max_block_size) { |
2977 | 0 | VP9_COMMON *const cm = &cpi->common; |
2978 | 0 | MODE_INFO **mi = xd->mi; |
2979 | 0 | const int left_in_image = !!xd->left_mi; |
2980 | 0 | const int above_in_image = !!xd->above_mi; |
2981 | 0 | const int row8x8_remaining = tile->mi_row_end - mi_row; |
2982 | 0 | const int col8x8_remaining = tile->mi_col_end - mi_col; |
2983 | 0 | int bh, bw; |
2984 | 0 | BLOCK_SIZE min_size = BLOCK_4X4; |
2985 | 0 | BLOCK_SIZE max_size = BLOCK_64X64; |
2986 | 0 | int bs_hist[BLOCK_SIZES] = { 0 }; |
2987 | | |
2988 | | // Trap case where we do not have a prediction. |
2989 | 0 | if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { |
2990 | | // Default "min to max" and "max to min" |
2991 | 0 | min_size = BLOCK_64X64; |
2992 | 0 | max_size = BLOCK_4X4; |
2993 | | |
2994 | | // NOTE: each call to get_sb_partition_size_range() uses the previous |
2995 | | // passed in values for min and max as a starting point. |
2996 | | // Find the min and max partition used in previous frame at this location |
2997 | 0 | if (cm->frame_type != KEY_FRAME) { |
2998 | 0 | MODE_INFO **prev_mi = |
2999 | 0 | &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; |
3000 | 0 | get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); |
3001 | 0 | } |
3002 | | // Find the min and max partition sizes used in the left SB64 |
3003 | 0 | if (left_in_image) { |
3004 | 0 | MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; |
3005 | 0 | get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, |
3006 | 0 | bs_hist); |
3007 | 0 | } |
3008 | | // Find the min and max partition sizes used in the above SB64. |
3009 | 0 | if (above_in_image) { |
3010 | 0 | MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; |
3011 | 0 | get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, |
3012 | 0 | bs_hist); |
3013 | 0 | } |
3014 | | |
3015 | | // Adjust observed min and max for "relaxed" auto partition case. |
3016 | 0 | if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { |
3017 | 0 | min_size = min_partition_size[min_size]; |
3018 | 0 | max_size = max_partition_size[max_size]; |
3019 | 0 | } |
3020 | 0 | } |
3021 | | |
3022 | | // Check border cases where max and min from neighbors may not be legal. |
3023 | 0 | max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, |
3024 | 0 | &bh, &bw); |
3025 | | // Test for blocks at the edge of the active image. |
3026 | | // This may be the actual edge of the image or where there are formatting |
3027 | | // bars. |
3028 | 0 | if (vp9_active_edge_sb(cpi, mi_row, mi_col)) { |
3029 | 0 | min_size = BLOCK_4X4; |
3030 | 0 | } else { |
3031 | 0 | min_size = |
3032 | 0 | VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); |
3033 | 0 | } |
3034 | | |
3035 | | // When use_square_partition_only is true, make sure at least one square |
3036 | | // partition is allowed by selecting the next smaller square size as |
3037 | | // *min_block_size. |
3038 | 0 | if (cpi->sf.use_square_partition_only && |
3039 | 0 | next_square_size[max_size] < min_size) { |
3040 | 0 | min_size = next_square_size[max_size]; |
3041 | 0 | } |
3042 | |
|
3043 | 0 | *min_block_size = min_size; |
3044 | 0 | *max_block_size = max_size; |
3045 | 0 | } |
3046 | | |
3047 | | // TODO(jingning) refactor functions setting partition search range |
3048 | | static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, |
3049 | | int mi_col, BLOCK_SIZE bsize, |
3050 | 0 | BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) { |
3051 | 0 | int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
3052 | 0 | int mi_height = num_8x8_blocks_high_lookup[bsize]; |
3053 | 0 | int idx, idy; |
3054 | |
|
3055 | 0 | MODE_INFO *mi; |
3056 | 0 | const int idx_str = cm->mi_stride * mi_row + mi_col; |
3057 | 0 | MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; |
3058 | 0 | BLOCK_SIZE bs, min_size, max_size; |
3059 | |
|
3060 | 0 | min_size = BLOCK_64X64; |
3061 | 0 | max_size = BLOCK_4X4; |
3062 | |
|
3063 | 0 | for (idy = 0; idy < mi_height; ++idy) { |
3064 | 0 | for (idx = 0; idx < mi_width; ++idx) { |
3065 | 0 | mi = prev_mi[idy * cm->mi_stride + idx]; |
3066 | 0 | bs = mi ? mi->sb_type : bsize; |
3067 | 0 | min_size = VPXMIN(min_size, bs); |
3068 | 0 | max_size = VPXMAX(max_size, bs); |
3069 | 0 | } |
3070 | 0 | } |
3071 | |
|
3072 | 0 | if (xd->left_mi) { |
3073 | 0 | for (idy = 0; idy < mi_height; ++idy) { |
3074 | 0 | mi = xd->mi[idy * cm->mi_stride - 1]; |
3075 | 0 | bs = mi ? mi->sb_type : bsize; |
3076 | 0 | min_size = VPXMIN(min_size, bs); |
3077 | 0 | max_size = VPXMAX(max_size, bs); |
3078 | 0 | } |
3079 | 0 | } |
3080 | |
|
3081 | 0 | if (xd->above_mi) { |
3082 | 0 | for (idx = 0; idx < mi_width; ++idx) { |
3083 | 0 | mi = xd->mi[idx - cm->mi_stride]; |
3084 | 0 | bs = mi ? mi->sb_type : bsize; |
3085 | 0 | min_size = VPXMIN(min_size, bs); |
3086 | 0 | max_size = VPXMAX(max_size, bs); |
3087 | 0 | } |
3088 | 0 | } |
3089 | |
|
3090 | 0 | if (min_size == max_size) { |
3091 | 0 | min_size = min_partition_size[min_size]; |
3092 | 0 | max_size = max_partition_size[max_size]; |
3093 | 0 | } |
3094 | |
|
3095 | 0 | *min_bs = min_size; |
3096 | 0 | *max_bs = max_size; |
3097 | 0 | } |
3098 | | #endif // !CONFIG_REALTIME_ONLY |
3099 | | |
3100 | 3.41M | static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { |
3101 | 3.41M | memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); |
3102 | 3.41M | } |
3103 | | |
3104 | 6.02M | static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { |
3105 | 6.02M | memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); |
3106 | 6.02M | } |
3107 | | |
3108 | | // Calculate prediction based on the given input features and neural net config. |
3109 | | // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden |
3110 | | // layer. |
3111 | | static void nn_predict(const float *features, const NN_CONFIG *nn_config, |
3112 | 1.39M | float *output) { |
3113 | 1.39M | int num_input_nodes = nn_config->num_inputs; |
3114 | 1.39M | int buf_index = 0; |
3115 | 1.39M | float buf[2][NN_MAX_NODES_PER_LAYER]; |
3116 | 1.39M | const float *input_nodes = features; |
3117 | | |
3118 | | // Propagate hidden layers. |
3119 | 1.39M | const int num_layers = nn_config->num_hidden_layers; |
3120 | 1.39M | int layer, node, i; |
3121 | 1.39M | assert(num_layers <= NN_MAX_HIDDEN_LAYERS); |
3122 | 2.78M | for (layer = 0; layer < num_layers; ++layer) { |
3123 | 1.39M | const float *weights = nn_config->weights[layer]; |
3124 | 1.39M | const float *bias = nn_config->bias[layer]; |
3125 | 1.39M | float *output_nodes = buf[buf_index]; |
3126 | 1.39M | const int num_output_nodes = nn_config->num_hidden_nodes[layer]; |
3127 | 1.39M | assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); |
3128 | 14.5M | for (node = 0; node < num_output_nodes; ++node) { |
3129 | 13.1M | float val = 0.0f; |
3130 | 154M | for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; |
3131 | 13.1M | val += bias[node]; |
3132 | | // ReLU as activation function. |
3133 | 13.1M | val = VPXMAX(val, 0.0f); |
3134 | 13.1M | output_nodes[node] = val; |
3135 | 13.1M | weights += num_input_nodes; |
3136 | 13.1M | } |
3137 | 1.39M | num_input_nodes = num_output_nodes; |
3138 | 1.39M | input_nodes = output_nodes; |
3139 | 1.39M | buf_index = 1 - buf_index; |
3140 | 1.39M | } |
3141 | | |
3142 | | // Final output layer. |
3143 | 1.39M | { |
3144 | 1.39M | const float *weights = nn_config->weights[num_layers]; |
3145 | 3.54M | for (node = 0; node < nn_config->num_outputs; ++node) { |
3146 | 2.15M | const float *bias = nn_config->bias[num_layers]; |
3147 | 2.15M | float val = 0.0f; |
3148 | 27.4M | for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; |
3149 | 2.15M | output[node] = val + bias[node]; |
3150 | 2.15M | weights += num_input_nodes; |
3151 | 2.15M | } |
3152 | 1.39M | } |
3153 | 1.39M | } |
3154 | | |
3155 | | #if !CONFIG_REALTIME_ONLY |
3156 | 0 | #define FEATURES 7 |
3157 | | // Machine-learning based partition search early termination. |
3158 | | // Return 1 to skip split and rect partitions. |
3159 | | static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, |
3160 | | PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, |
3161 | 0 | BLOCK_SIZE bsize) { |
3162 | 0 | const int mag_mv = |
3163 | 0 | abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); |
3164 | 0 | const int left_in_image = !!xd->left_mi; |
3165 | 0 | const int above_in_image = !!xd->above_mi; |
3166 | 0 | MODE_INFO **prev_mi = |
3167 | 0 | &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; |
3168 | 0 | int above_par = 0; // above_partitioning |
3169 | 0 | int left_par = 0; // left_partitioning |
3170 | 0 | int last_par = 0; // last_partitioning |
3171 | 0 | int offset = 0; |
3172 | 0 | int i; |
3173 | 0 | BLOCK_SIZE context_size; |
3174 | 0 | const NN_CONFIG *nn_config = NULL; |
3175 | 0 | const float *mean, *sd, *linear_weights; |
3176 | 0 | float nn_score, linear_score; |
3177 | 0 | float features[FEATURES]; |
3178 | |
|
3179 | 0 | assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); |
3180 | 0 | vpx_clear_system_state(); |
3181 | |
|
3182 | 0 | switch (bsize) { |
3183 | 0 | case BLOCK_64X64: |
3184 | 0 | offset = 0; |
3185 | 0 | nn_config = &vp9_partition_nnconfig_64x64; |
3186 | 0 | break; |
3187 | 0 | case BLOCK_32X32: |
3188 | 0 | offset = 8; |
3189 | 0 | nn_config = &vp9_partition_nnconfig_32x32; |
3190 | 0 | break; |
3191 | 0 | case BLOCK_16X16: |
3192 | 0 | offset = 16; |
3193 | 0 | nn_config = &vp9_partition_nnconfig_16x16; |
3194 | 0 | break; |
3195 | 0 | default: assert(0 && "Unexpected block size."); return 0; |
3196 | 0 | } |
3197 | | |
3198 | 0 | if (above_in_image) { |
3199 | 0 | context_size = xd->above_mi->sb_type; |
3200 | 0 | if (context_size < bsize) |
3201 | 0 | above_par = 2; |
3202 | 0 | else if (context_size == bsize) |
3203 | 0 | above_par = 1; |
3204 | 0 | } |
3205 | |
|
3206 | 0 | if (left_in_image) { |
3207 | 0 | context_size = xd->left_mi->sb_type; |
3208 | 0 | if (context_size < bsize) |
3209 | 0 | left_par = 2; |
3210 | 0 | else if (context_size == bsize) |
3211 | 0 | left_par = 1; |
3212 | 0 | } |
3213 | |
|
3214 | 0 | if (prev_mi[0]) { |
3215 | 0 | context_size = prev_mi[0]->sb_type; |
3216 | 0 | if (context_size < bsize) |
3217 | 0 | last_par = 2; |
3218 | 0 | else if (context_size == bsize) |
3219 | 0 | last_par = 1; |
3220 | 0 | } |
3221 | |
|
3222 | 0 | mean = &vp9_partition_feature_mean[offset]; |
3223 | 0 | sd = &vp9_partition_feature_std[offset]; |
3224 | 0 | features[0] = ((float)ctx->rate - mean[0]) / sd[0]; |
3225 | 0 | features[1] = ((float)ctx->dist - mean[1]) / sd[1]; |
3226 | 0 | features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2]; |
3227 | 0 | features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3]; |
3228 | 0 | features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4]; |
3229 | 0 | features[5] = ((float)cm->base_qindex - mean[5]) * sd[5]; |
3230 | 0 | features[6] = ((float)last_par - mean[6]) * sd[6]; |
3231 | | |
3232 | | // Predict using linear model. |
3233 | 0 | linear_weights = &vp9_partition_linear_weights[offset]; |
3234 | 0 | linear_score = linear_weights[FEATURES]; |
3235 | 0 | for (i = 0; i < FEATURES; ++i) |
3236 | 0 | linear_score += linear_weights[i] * features[i]; |
3237 | 0 | if (linear_score > 0.1f) return 0; |
3238 | | |
3239 | | // Predict using neural net model. |
3240 | 0 | nn_predict(features, nn_config, &nn_score); |
3241 | |
|
3242 | 0 | if (linear_score < -0.0f && nn_score < 0.1f) return 1; |
3243 | 0 | if (nn_score < -0.0f && linear_score < 0.1f) return 1; |
3244 | 0 | return 0; |
3245 | 0 | } |
3246 | | #undef FEATURES |
3247 | | |
3248 | 193k | #define FEATURES 4 |
3249 | | // ML-based partition search breakout. |
3250 | | static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, |
3251 | | const MACROBLOCK *const x, |
3252 | 32.2k | const RD_COST *const rd_cost) { |
3253 | 32.2k | DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; |
3254 | 32.2k | const VP9_COMMON *const cm = &cpi->common; |
3255 | 32.2k | float features[FEATURES]; |
3256 | 32.2k | const float *linear_weights = NULL; // Linear model weights. |
3257 | 32.2k | float linear_score = 0.0f; |
3258 | 32.2k | const int qindex = cm->base_qindex; |
3259 | 32.2k | const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2); |
3260 | 32.2k | const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720; |
3261 | 32.2k | const int resolution_ctx = is_720p_or_larger ? 1 : 0; |
3262 | | |
3263 | 32.2k | switch (bsize) { |
3264 | 139 | case BLOCK_64X64: |
3265 | 139 | linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx]; |
3266 | 139 | break; |
3267 | 644 | case BLOCK_32X32: |
3268 | 644 | linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx]; |
3269 | 644 | break; |
3270 | 4.09k | case BLOCK_16X16: |
3271 | 4.09k | linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx]; |
3272 | 4.09k | break; |
3273 | 27.4k | case BLOCK_8X8: |
3274 | 27.4k | linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx]; |
3275 | 27.4k | break; |
3276 | 0 | default: assert(0 && "Unexpected block size."); return 0; |
3277 | 32.2k | } |
3278 | 32.2k | if (!linear_weights) return 0; |
3279 | | |
3280 | 32.2k | { // Generate feature values. |
3281 | 32.2k | #if CONFIG_VP9_HIGHBITDEPTH |
3282 | 32.2k | const int ac_q = |
3283 | 32.2k | vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); |
3284 | | #else |
3285 | | const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); |
3286 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3287 | 32.2k | const int num_pels_log2 = num_pels_log2_lookup[bsize]; |
3288 | 32.2k | int feature_index = 0; |
3289 | 32.2k | unsigned int var, sse; |
3290 | 32.2k | float rate_f, dist_f; |
3291 | | |
3292 | 32.2k | #if CONFIG_VP9_HIGHBITDEPTH |
3293 | 32.2k | if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
3294 | 0 | var = |
3295 | 0 | vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd); |
3296 | 32.2k | } else { |
3297 | 32.2k | var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
3298 | 32.2k | vp9_64_zeros, 0, &sse); |
3299 | 32.2k | } |
3300 | | #else |
3301 | | var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
3302 | | vp9_64_zeros, 0, &sse); |
3303 | | #endif |
3304 | 32.2k | var = var >> num_pels_log2; |
3305 | | |
3306 | 32.2k | vpx_clear_system_state(); |
3307 | | |
3308 | 32.2k | rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX); |
3309 | 32.2k | dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2); |
3310 | 32.2k | rate_f = |
3311 | 32.2k | ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * |
3312 | 32.2k | rate_f; |
3313 | | |
3314 | 32.2k | features[feature_index++] = rate_f; |
3315 | 32.2k | features[feature_index++] = dist_f; |
3316 | 32.2k | features[feature_index++] = (float)var; |
3317 | 32.2k | features[feature_index++] = (float)ac_q; |
3318 | 32.2k | assert(feature_index == FEATURES); |
3319 | 32.2k | } |
3320 | | |
3321 | 32.2k | { // Calculate the output score. |
3322 | 32.2k | int i; |
3323 | 32.2k | linear_score = linear_weights[FEATURES]; |
3324 | 161k | for (i = 0; i < FEATURES; ++i) |
3325 | 129k | linear_score += linear_weights[i] * features[i]; |
3326 | 32.2k | } |
3327 | | |
3328 | 32.2k | return linear_score >= cpi->sf.rd_ml_partition.search_breakout_thresh[q_ctx]; |
3329 | 32.2k | } |
3330 | | #undef FEATURES |
3331 | | |
3332 | | #define FEATURES 8 |
3333 | 2.52M | #define LABELS 4 |
3334 | | static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, |
3335 | | BLOCK_SIZE bsize, |
3336 | | const PC_TREE *const pc_tree, |
3337 | | int *allow_horz, int *allow_vert, |
3338 | 581k | int64_t ref_rd) { |
3339 | 581k | const NN_CONFIG *nn_config = NULL; |
3340 | 581k | float score[LABELS] = { |
3341 | 581k | 0.0f, |
3342 | 581k | }; |
3343 | 581k | int thresh = -1; |
3344 | 581k | int i; |
3345 | 581k | (void)x; |
3346 | | |
3347 | 581k | if (ref_rd <= 0 || ref_rd > 1000000000) return; |
3348 | | |
3349 | 458k | switch (bsize) { |
3350 | 0 | case BLOCK_8X8: break; |
3351 | 360k | case BLOCK_16X16: |
3352 | 360k | nn_config = &vp9_rect_part_nnconfig_16; |
3353 | 360k | thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[1]; |
3354 | 360k | break; |
3355 | 88.0k | case BLOCK_32X32: |
3356 | 88.0k | nn_config = &vp9_rect_part_nnconfig_32; |
3357 | 88.0k | thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[2]; |
3358 | 88.0k | break; |
3359 | 9.50k | case BLOCK_64X64: |
3360 | 9.50k | nn_config = &vp9_rect_part_nnconfig_64; |
3361 | 9.50k | thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[3]; |
3362 | 9.50k | break; |
3363 | 0 | default: assert(0 && "Unexpected block size."); return; |
3364 | 458k | } |
3365 | 458k | if (!nn_config || thresh < 0) return; |
3366 | | |
3367 | | // Feature extraction and model score calculation. |
3368 | 252k | { |
3369 | 252k | const VP9_COMMON *const cm = &cpi->common; |
3370 | 252k | #if CONFIG_VP9_HIGHBITDEPTH |
3371 | 252k | const int dc_q = |
3372 | 252k | vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); |
3373 | | #else |
3374 | | const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); |
3375 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3376 | 252k | const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; |
3377 | 252k | int feature_index = 0; |
3378 | 252k | float features[FEATURES]; |
3379 | | |
3380 | 252k | features[feature_index++] = logf((float)dc_q + 1.0f); |
3381 | 252k | features[feature_index++] = |
3382 | 252k | (float)(pc_tree->partitioning == PARTITION_NONE); |
3383 | 252k | features[feature_index++] = logf((float)ref_rd / bs / bs + 1.0f); |
3384 | | |
3385 | 252k | { |
3386 | 252k | const float norm_factor = 1.0f / ((float)ref_rd + 1.0f); |
3387 | 252k | const int64_t none_rdcost = pc_tree->none.rdcost; |
3388 | 252k | float rd_ratio = 2.0f; |
3389 | 252k | if (none_rdcost > 0 && none_rdcost < 1000000000) |
3390 | 159k | rd_ratio = (float)none_rdcost * norm_factor; |
3391 | 252k | features[feature_index++] = VPXMIN(rd_ratio, 2.0f); |
3392 | | |
3393 | 1.26M | for (i = 0; i < 4; ++i) { |
3394 | 1.01M | const int64_t this_rd = pc_tree->u.split[i]->none.rdcost; |
3395 | 1.01M | const int rd_valid = this_rd > 0 && this_rd < 1000000000; |
3396 | | // Ratio between sub-block RD and whole block RD. |
3397 | 1.01M | features[feature_index++] = |
3398 | 1.01M | rd_valid ? (float)this_rd * norm_factor : 1.0f; |
3399 | 1.01M | } |
3400 | 252k | } |
3401 | | |
3402 | 252k | assert(feature_index == FEATURES); |
3403 | 252k | nn_predict(features, nn_config, score); |
3404 | 252k | } |
3405 | | |
3406 | | // Make decisions based on the model score. |
3407 | 252k | { |
3408 | 252k | int max_score = -1000; |
3409 | 252k | int horz = 0, vert = 0; |
3410 | 252k | int int_score[LABELS]; |
3411 | 1.26M | for (i = 0; i < LABELS; ++i) { |
3412 | 1.01M | int_score[i] = (int)(100 * score[i]); |
3413 | 1.01M | max_score = VPXMAX(int_score[i], max_score); |
3414 | 1.01M | } |
3415 | 252k | thresh = max_score - thresh; |
3416 | 1.26M | for (i = 0; i < LABELS; ++i) { |
3417 | 1.01M | if (int_score[i] >= thresh) { |
3418 | 294k | if ((i >> 0) & 1) horz = 1; |
3419 | 294k | if ((i >> 1) & 1) vert = 1; |
3420 | 294k | } |
3421 | 1.01M | } |
3422 | 252k | *allow_horz = *allow_horz && horz; |
3423 | 252k | *allow_vert = *allow_vert && vert; |
3424 | 252k | } |
3425 | 252k | } |
3426 | | #undef FEATURES |
3427 | | #undef LABELS |
3428 | | |
3429 | | // Perform fast and coarse motion search for the given block. This is a |
3430 | | // pre-processing step for the ML based partition search speedup. |
3431 | | static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x, |
3432 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
3433 | | MV ref_mv, MV_REFERENCE_FRAME ref, |
3434 | 1.13M | uint8_t *const pred_buf) { |
3435 | 1.13M | const VP9_COMMON *const cm = &cpi->common; |
3436 | 1.13M | MACROBLOCKD *const xd = &x->e_mbd; |
3437 | 1.13M | MODE_INFO *const mi = xd->mi[0]; |
3438 | 1.13M | YV12_BUFFER_CONFIG *yv12; |
3439 | 1.13M | YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); |
3440 | 1.13M | const int step_param = 1; |
3441 | 1.13M | const MvLimits tmp_mv_limits = x->mv_limits; |
3442 | 1.13M | const SEARCH_METHODS search_method = NSTEP; |
3443 | 1.13M | const int sadpb = x->sadperbit16; |
3444 | 1.13M | MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; |
3445 | 1.13M | MV best_mv = { 0, 0 }; |
3446 | 1.13M | int cost_list[5]; |
3447 | 1.13M | struct buf_2d backup_pre[MAX_MB_PLANE] = { { 0, 0 } }; |
3448 | | |
3449 | 1.13M | if (scaled_ref_frame) { |
3450 | 0 | yv12 = scaled_ref_frame; |
3451 | | // As reported in b/311294795, the reference buffer pointer needs to be |
3452 | | // saved and restored after the search. Otherwise, it causes problems while |
3453 | | // the reference frame scaling happens. |
3454 | 0 | for (int i = 0; i < MAX_MB_PLANE; i++) backup_pre[i] = xd->plane[i].pre[0]; |
3455 | 1.13M | } else { |
3456 | 1.13M | yv12 = get_ref_frame_buffer(cpi, ref); |
3457 | 1.13M | } |
3458 | | |
3459 | 1.13M | assert(yv12 != NULL); |
3460 | 1.13M | if (!yv12) return; |
3461 | 1.13M | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, NULL); |
3462 | 1.13M | mi->ref_frame[0] = ref; |
3463 | 1.13M | mi->ref_frame[1] = NO_REF_FRAME; |
3464 | 1.13M | mi->sb_type = bsize; |
3465 | 1.13M | vp9_set_mv_search_range(&x->mv_limits, &ref_mv); |
3466 | 1.13M | vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, |
3467 | 1.13M | sadpb, cond_cost_list(cpi, cost_list), &ref_mv, |
3468 | 1.13M | &best_mv, 0, 0); |
3469 | 1.13M | best_mv.row *= 8; |
3470 | 1.13M | best_mv.col *= 8; |
3471 | 1.13M | x->mv_limits = tmp_mv_limits; |
3472 | 1.13M | mi->mv[0].as_mv = best_mv; |
3473 | | |
3474 | | // Restore reference buffer pointer. |
3475 | 1.13M | if (scaled_ref_frame) { |
3476 | 0 | for (int i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_pre[i]; |
3477 | 0 | } |
3478 | | |
3479 | 1.13M | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
3480 | 1.13M | xd->plane[0].dst.buf = pred_buf; |
3481 | 1.13M | xd->plane[0].dst.stride = 64; |
3482 | 1.13M | vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); |
3483 | 1.13M | } |
3484 | | |
3485 | | // Use a neural net model to prune partition-none and partition-split search. |
3486 | | // Features used: QP; spatial block size contexts; variance of prediction |
3487 | | // residue after simple_motion_search. |
3488 | | #define FEATURES 12 |
3489 | | static void ml_predict_var_rd_partitioning(const VP9_COMP *const cpi, |
3490 | | MACROBLOCK *const x, |
3491 | | PC_TREE *const pc_tree, |
3492 | | BLOCK_SIZE bsize, int mi_row, |
3493 | 1.13M | int mi_col, int *none, int *split) { |
3494 | 1.13M | const VP9_COMMON *const cm = &cpi->common; |
3495 | 1.13M | const NN_CONFIG *nn_config = NULL; |
3496 | 1.13M | const MACROBLOCKD *const xd = &x->e_mbd; |
3497 | 1.13M | #if CONFIG_VP9_HIGHBITDEPTH |
3498 | 1.13M | DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); |
3499 | 1.13M | uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
3500 | 1.13M | ? (CONVERT_TO_BYTEPTR(pred_buffer)) |
3501 | 1.13M | : pred_buffer; |
3502 | | #else |
3503 | | DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]); |
3504 | | uint8_t *const pred_buf = pred_buffer; |
3505 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3506 | 1.13M | const int speed = cpi->oxcf.speed; |
3507 | 1.13M | float thresh = 0.0f; |
3508 | | |
3509 | 1.13M | switch (bsize) { |
3510 | 14.1k | case BLOCK_64X64: |
3511 | 14.1k | nn_config = &vp9_part_split_nnconfig_64; |
3512 | 14.1k | thresh = speed > 0 ? 2.8f : 3.0f; |
3513 | 14.1k | break; |
3514 | 56.3k | case BLOCK_32X32: |
3515 | 56.3k | nn_config = &vp9_part_split_nnconfig_32; |
3516 | 56.3k | thresh = speed > 0 ? 3.5f : 3.0f; |
3517 | 56.3k | break; |
3518 | 219k | case BLOCK_16X16: |
3519 | 219k | nn_config = &vp9_part_split_nnconfig_16; |
3520 | 219k | thresh = speed > 0 ? 3.8f : 4.0f; |
3521 | 219k | break; |
3522 | 849k | case BLOCK_8X8: |
3523 | 849k | nn_config = &vp9_part_split_nnconfig_8; |
3524 | 849k | if (cm->width >= 720 && cm->height >= 720) |
3525 | 0 | thresh = speed > 0 ? 2.5f : 2.0f; |
3526 | 849k | else |
3527 | 849k | thresh = speed > 0 ? 3.8f : 2.0f; |
3528 | 849k | break; |
3529 | 0 | default: assert(0 && "Unexpected block size."); return; |
3530 | 1.13M | } |
3531 | | |
3532 | 1.13M | if (!nn_config) return; |
3533 | | |
3534 | | // Do a simple single motion search to find a prediction for current block. |
3535 | | // The variance of the residue will be used as input features. |
3536 | 1.13M | { |
3537 | 1.13M | MV ref_mv; |
3538 | 1.13M | const MV_REFERENCE_FRAME ref = |
3539 | 1.13M | cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; |
3540 | | // If bsize is 64x64, use zero MV as reference; otherwise, use MV result |
3541 | | // of previous(larger) block as reference. |
3542 | 1.13M | if (bsize == BLOCK_64X64) |
3543 | 14.1k | ref_mv.row = ref_mv.col = 0; |
3544 | 1.12M | else |
3545 | 1.12M | ref_mv = pc_tree->mv; |
3546 | 1.13M | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
3547 | 1.13M | simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); |
3548 | 1.13M | pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv; |
3549 | 1.13M | } |
3550 | | |
3551 | 1.13M | vpx_clear_system_state(); |
3552 | | |
3553 | 1.13M | { |
3554 | 1.13M | float features[FEATURES] = { 0.0f }; |
3555 | 1.13M | #if CONFIG_VP9_HIGHBITDEPTH |
3556 | 1.13M | const int dc_q = |
3557 | 1.13M | vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8); |
3558 | | #else |
3559 | | const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); |
3560 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3561 | 1.13M | int feature_idx = 0; |
3562 | 1.13M | float score; |
3563 | | |
3564 | | // Generate model input features. |
3565 | 1.13M | features[feature_idx++] = logf((float)dc_q + 1.0f); |
3566 | | |
3567 | | // Get the variance of the residue as input features. |
3568 | 1.13M | { |
3569 | 1.13M | const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; |
3570 | 1.13M | const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); |
3571 | 1.13M | const uint8_t *pred = pred_buf; |
3572 | 1.13M | const uint8_t *src = x->plane[0].src.buf; |
3573 | 1.13M | const int src_stride = x->plane[0].src.stride; |
3574 | 1.13M | const int pred_stride = 64; |
3575 | 1.13M | unsigned int sse; |
3576 | | // Variance of whole block. |
3577 | 1.13M | const unsigned int var = |
3578 | 1.13M | cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); |
3579 | 1.13M | const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); |
3580 | 1.13M | const int has_above = !!xd->above_mi; |
3581 | 1.13M | const int has_left = !!xd->left_mi; |
3582 | 1.13M | const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize; |
3583 | 1.13M | const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize; |
3584 | 1.13M | int i; |
3585 | | |
3586 | 1.13M | features[feature_idx++] = (float)has_above; |
3587 | 1.13M | features[feature_idx++] = (float)b_width_log2_lookup[above_bsize]; |
3588 | 1.13M | features[feature_idx++] = (float)b_height_log2_lookup[above_bsize]; |
3589 | 1.13M | features[feature_idx++] = (float)has_left; |
3590 | 1.13M | features[feature_idx++] = (float)b_width_log2_lookup[left_bsize]; |
3591 | 1.13M | features[feature_idx++] = (float)b_height_log2_lookup[left_bsize]; |
3592 | 1.13M | features[feature_idx++] = logf((float)var + 1.0f); |
3593 | 5.69M | for (i = 0; i < 4; ++i) { |
3594 | 4.55M | const int x_idx = (i & 1) * bs / 2; |
3595 | 4.55M | const int y_idx = (i >> 1) * bs / 2; |
3596 | 4.55M | const int src_offset = y_idx * src_stride + x_idx; |
3597 | 4.55M | const int pred_offset = y_idx * pred_stride + x_idx; |
3598 | | // Variance of quarter block. |
3599 | 4.55M | const unsigned int sub_var = |
3600 | 4.55M | cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, |
3601 | 4.55M | pred + pred_offset, pred_stride, &sse); |
3602 | 4.55M | const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; |
3603 | 4.55M | features[feature_idx++] = var_ratio; |
3604 | 4.55M | } |
3605 | 1.13M | } |
3606 | 1.13M | assert(feature_idx == FEATURES); |
3607 | | |
3608 | | // Feed the features into the model to get the confidence score. |
3609 | 1.13M | nn_predict(features, nn_config, &score); |
3610 | | |
3611 | | // Higher score means that the model has higher confidence that the split |
3612 | | // partition is better than the non-split partition. So if the score is |
3613 | | // high enough, we skip the none-split partition search; if the score is |
3614 | | // low enough, we skip the split partition search. |
3615 | 1.13M | if (score > thresh) *none = 0; |
3616 | 1.13M | if (score < -thresh) *split = 0; |
3617 | 1.13M | } |
3618 | 1.13M | } |
3619 | | #undef FEATURES |
3620 | | #endif // !CONFIG_REALTIME_ONLY |
3621 | | |
3622 | 0 | static double log_wiener_var(int64_t wiener_variance) { |
3623 | 0 | return log(1.0 + wiener_variance) / log(2.0); |
3624 | 0 | } |
3625 | | |
3626 | 0 | static void build_kmeans_segmentation(VP9_COMP *cpi) { |
3627 | 0 | VP9_COMMON *cm = &cpi->common; |
3628 | 0 | BLOCK_SIZE bsize = BLOCK_64X64; |
3629 | 0 | KMEANS_DATA *kmeans_data; |
3630 | |
|
3631 | 0 | vp9_disable_segmentation(&cm->seg); |
3632 | 0 | if (cm->show_frame) { |
3633 | 0 | int mi_row, mi_col; |
3634 | 0 | cpi->kmeans_data_size = 0; |
3635 | 0 | cpi->kmeans_ctr_num = 8; |
3636 | |
|
3637 | 0 | for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { |
3638 | 0 | for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { |
3639 | 0 | int mb_row_start = mi_row >> 1; |
3640 | 0 | int mb_col_start = mi_col >> 1; |
3641 | 0 | int mb_row_end = VPXMIN( |
3642 | 0 | (mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); |
3643 | 0 | int mb_col_end = VPXMIN( |
3644 | 0 | (mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); |
3645 | 0 | int row, col; |
3646 | 0 | int64_t wiener_variance = 0; |
3647 | |
|
3648 | 0 | for (row = mb_row_start; row < mb_row_end; ++row) |
3649 | 0 | for (col = mb_col_start; col < mb_col_end; ++col) |
3650 | 0 | wiener_variance += cpi->mb_wiener_variance[row * cm->mb_cols + col]; |
3651 | |
|
3652 | 0 | wiener_variance /= |
3653 | 0 | (mb_row_end - mb_row_start) * (mb_col_end - mb_col_start); |
3654 | |
|
3655 | 0 | #if CONFIG_MULTITHREAD |
3656 | 0 | pthread_mutex_lock(&cpi->kmeans_mutex); |
3657 | 0 | #endif // CONFIG_MULTITHREAD |
3658 | |
|
3659 | 0 | kmeans_data = &cpi->kmeans_data_arr[cpi->kmeans_data_size++]; |
3660 | 0 | kmeans_data->value = log_wiener_var(wiener_variance); |
3661 | 0 | kmeans_data->pos = mi_row * cpi->kmeans_data_stride + mi_col; |
3662 | 0 | #if CONFIG_MULTITHREAD |
3663 | 0 | pthread_mutex_unlock(&cpi->kmeans_mutex); |
3664 | 0 | #endif // CONFIG_MULTITHREAD |
3665 | 0 | } |
3666 | 0 | } |
3667 | |
|
3668 | 0 | vp9_kmeans(cpi->kmeans_ctr_ls, cpi->kmeans_boundary_ls, |
3669 | 0 | cpi->kmeans_count_ls, cpi->kmeans_ctr_num, cpi->kmeans_data_arr, |
3670 | 0 | cpi->kmeans_data_size); |
3671 | |
|
3672 | 0 | vp9_perceptual_aq_mode_setup(cpi, &cm->seg); |
3673 | 0 | } |
3674 | 0 | } |
3675 | | |
3676 | | #if !CONFIG_REALTIME_ONLY |
3677 | | static int wiener_var_segment(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
3678 | 0 | int mi_col) { |
3679 | 0 | VP9_COMMON *cm = &cpi->common; |
3680 | 0 | int mb_row_start = mi_row >> 1; |
3681 | 0 | int mb_col_start = mi_col >> 1; |
3682 | 0 | int mb_row_end = |
3683 | 0 | VPXMIN((mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); |
3684 | 0 | int mb_col_end = |
3685 | 0 | VPXMIN((mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); |
3686 | 0 | int row, col, idx; |
3687 | 0 | int64_t wiener_variance = 0; |
3688 | 0 | int segment_id; |
3689 | 0 | int8_t seg_hist[MAX_SEGMENTS] = { 0 }; |
3690 | 0 | int8_t max_count = 0, max_index = -1; |
3691 | |
|
3692 | 0 | vpx_clear_system_state(); |
3693 | |
|
3694 | 0 | assert(cpi->norm_wiener_variance > 0); |
3695 | |
|
3696 | 0 | for (row = mb_row_start; row < mb_row_end; ++row) { |
3697 | 0 | for (col = mb_col_start; col < mb_col_end; ++col) { |
3698 | 0 | wiener_variance = cpi->mb_wiener_variance[row * cm->mb_cols + col]; |
3699 | 0 | segment_id = |
3700 | 0 | vp9_get_group_idx(log_wiener_var(wiener_variance), |
3701 | 0 | cpi->kmeans_boundary_ls, cpi->kmeans_ctr_num); |
3702 | 0 | ++seg_hist[segment_id]; |
3703 | 0 | } |
3704 | 0 | } |
3705 | |
|
3706 | 0 | for (idx = 0; idx < cpi->kmeans_ctr_num; ++idx) { |
3707 | 0 | if (seg_hist[idx] > max_count) { |
3708 | 0 | max_count = seg_hist[idx]; |
3709 | 0 | max_index = idx; |
3710 | 0 | } |
3711 | 0 | } |
3712 | |
|
3713 | 0 | assert(max_index >= 0); |
3714 | 0 | segment_id = max_index; |
3715 | |
|
3716 | 0 | return segment_id; |
3717 | 0 | } |
3718 | | |
3719 | | static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
3720 | 0 | int mi_col, int orig_rdmult) { |
3721 | 0 | const int gf_group_index = cpi->twopass.gf_group.index; |
3722 | 0 | int64_t intra_cost = 0; |
3723 | 0 | int64_t mc_dep_cost = 0; |
3724 | 0 | int mi_wide = num_8x8_blocks_wide_lookup[bsize]; |
3725 | 0 | int mi_high = num_8x8_blocks_high_lookup[bsize]; |
3726 | 0 | int row, col; |
3727 | |
|
3728 | 0 | int dr = 0; |
3729 | 0 | double r0, rk, beta; |
3730 | |
|
3731 | 0 | TplDepFrame *tpl_frame; |
3732 | 0 | TplDepStats *tpl_stats; |
3733 | 0 | int tpl_stride; |
3734 | |
|
3735 | 0 | if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; |
3736 | 0 | tpl_frame = &cpi->tpl_stats[gf_group_index]; |
3737 | |
|
3738 | 0 | if (tpl_frame->is_valid == 0) return orig_rdmult; |
3739 | 0 | tpl_stats = tpl_frame->tpl_stats_ptr; |
3740 | 0 | tpl_stride = tpl_frame->stride; |
3741 | |
|
3742 | 0 | if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; |
3743 | | |
3744 | 0 | for (row = mi_row; row < mi_row + mi_high; ++row) { |
3745 | 0 | for (col = mi_col; col < mi_col + mi_wide; ++col) { |
3746 | 0 | TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; |
3747 | |
|
3748 | 0 | if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; |
3749 | | |
3750 | 0 | intra_cost += this_stats->intra_cost; |
3751 | 0 | mc_dep_cost += this_stats->mc_dep_cost; |
3752 | 0 | } |
3753 | 0 | } |
3754 | |
|
3755 | 0 | vpx_clear_system_state(); |
3756 | |
|
3757 | 0 | r0 = cpi->rd.r0; |
3758 | 0 | rk = (double)intra_cost / mc_dep_cost; |
3759 | 0 | beta = r0 / rk; |
3760 | 0 | dr = vp9_get_adaptive_rdmult(cpi, beta); |
3761 | |
|
3762 | 0 | dr = VPXMIN(dr, orig_rdmult * 3 / 2); |
3763 | 0 | dr = VPXMAX(dr, orig_rdmult * 1 / 2); |
3764 | |
|
3765 | 0 | dr = VPXMAX(1, dr); |
3766 | |
|
3767 | 0 | return dr; |
3768 | 0 | } |
3769 | | #endif // !CONFIG_REALTIME_ONLY |
3770 | | |
3771 | | #if CONFIG_RATE_CTRL |
3772 | | static void assign_partition_info( |
3773 | | const int row_start_4x4, const int col_start_4x4, const int block_width_4x4, |
3774 | | const int block_height_4x4, const int num_unit_rows, |
3775 | | const int num_unit_cols, PARTITION_INFO *partition_info) { |
3776 | | int i, j; |
3777 | | for (i = 0; i < block_height_4x4; ++i) { |
3778 | | for (j = 0; j < block_width_4x4; ++j) { |
3779 | | const int row_4x4 = row_start_4x4 + i; |
3780 | | const int col_4x4 = col_start_4x4 + j; |
3781 | | const int unit_index = row_4x4 * num_unit_cols + col_4x4; |
3782 | | if (row_4x4 >= num_unit_rows || col_4x4 >= num_unit_cols) continue; |
3783 | | partition_info[unit_index].row = row_4x4 << 2; |
3784 | | partition_info[unit_index].column = col_4x4 << 2; |
3785 | | partition_info[unit_index].row_start = row_start_4x4 << 2; |
3786 | | partition_info[unit_index].column_start = col_start_4x4 << 2; |
3787 | | partition_info[unit_index].width = block_width_4x4 << 2; |
3788 | | partition_info[unit_index].height = block_height_4x4 << 2; |
3789 | | } |
3790 | | } |
3791 | | } |
3792 | | |
3793 | | static void assign_motion_vector_info(const int block_width_4x4, |
3794 | | const int block_height_4x4, |
3795 | | const int row_start_4x4, |
3796 | | const int col_start_4x4, |
3797 | | const int num_unit_rows, |
3798 | | const int num_unit_cols, MV *source_mv[2], |
3799 | | MV_REFERENCE_FRAME source_ref_frame[2], |
3800 | | MOTION_VECTOR_INFO *motion_vector_info) { |
3801 | | int i, j; |
3802 | | for (i = 0; i < block_height_4x4; ++i) { |
3803 | | for (j = 0; j < block_width_4x4; ++j) { |
3804 | | const int row_4x4 = row_start_4x4 + i; |
3805 | | const int col_4x4 = col_start_4x4 + j; |
3806 | | const int unit_index = row_4x4 * num_unit_cols + col_4x4; |
3807 | | if (row_4x4 >= num_unit_rows || col_4x4 >= num_unit_cols) continue; |
3808 | | if (source_ref_frame[1] == NO_REF_FRAME) { |
3809 | | assert(source_mv[1]->row == 0 && source_mv[1]->col == 0); |
3810 | | } |
3811 | | motion_vector_info[unit_index].ref_frame[0] = source_ref_frame[0]; |
3812 | | motion_vector_info[unit_index].ref_frame[1] = source_ref_frame[1]; |
3813 | | motion_vector_info[unit_index].mv[0].as_mv.row = source_mv[0]->row; |
3814 | | motion_vector_info[unit_index].mv[0].as_mv.col = source_mv[0]->col; |
3815 | | motion_vector_info[unit_index].mv[1].as_mv.row = source_mv[1]->row; |
3816 | | motion_vector_info[unit_index].mv[1].as_mv.col = source_mv[1]->col; |
3817 | | } |
3818 | | } |
3819 | | } |
3820 | | |
3821 | | static void store_superblock_info( |
3822 | | const PC_TREE *const pc_tree, MODE_INFO **mi_grid_visible, |
3823 | | const int mi_stride, const int square_size_4x4, const int num_unit_rows, |
3824 | | const int num_unit_cols, const int row_start_4x4, const int col_start_4x4, |
3825 | | PARTITION_INFO *partition_info, MOTION_VECTOR_INFO *motion_vector_info) { |
3826 | | const int subblock_square_size_4x4 = square_size_4x4 >> 1; |
3827 | | if (row_start_4x4 >= num_unit_rows || col_start_4x4 >= num_unit_cols) return; |
3828 | | assert(pc_tree->partitioning != PARTITION_INVALID); |
3829 | | // End node, no split. |
3830 | | if (pc_tree->partitioning == PARTITION_NONE || |
3831 | | pc_tree->partitioning == PARTITION_HORZ || |
3832 | | pc_tree->partitioning == PARTITION_VERT || square_size_4x4 == 1) { |
3833 | | const int mi_row = row_start_4x4 >> 1; |
3834 | | const int mi_col = col_start_4x4 >> 1; |
3835 | | const int mi_idx = mi_stride * mi_row + mi_col; |
3836 | | MODE_INFO **mi = mi_grid_visible + mi_idx; |
3837 | | MV *source_mv[2]; |
3838 | | MV_REFERENCE_FRAME source_ref_frame[2]; |
3839 | | |
3840 | | // partition info |
3841 | | const int block_width_4x4 = (pc_tree->partitioning == PARTITION_VERT) |
3842 | | ? square_size_4x4 >> 1 |
3843 | | : square_size_4x4; |
3844 | | const int block_height_4x4 = (pc_tree->partitioning == PARTITION_HORZ) |
3845 | | ? square_size_4x4 >> 1 |
3846 | | : square_size_4x4; |
3847 | | assign_partition_info(row_start_4x4, col_start_4x4, block_width_4x4, |
3848 | | block_height_4x4, num_unit_rows, num_unit_cols, |
3849 | | partition_info); |
3850 | | if (pc_tree->partitioning == PARTITION_VERT) { |
3851 | | assign_partition_info(row_start_4x4, col_start_4x4 + block_width_4x4, |
3852 | | block_width_4x4, block_height_4x4, num_unit_rows, |
3853 | | num_unit_cols, partition_info); |
3854 | | } else if (pc_tree->partitioning == PARTITION_HORZ) { |
3855 | | assign_partition_info(row_start_4x4 + block_height_4x4, col_start_4x4, |
3856 | | block_width_4x4, block_height_4x4, num_unit_rows, |
3857 | | num_unit_cols, partition_info); |
3858 | | } |
3859 | | |
3860 | | // motion vector info |
3861 | | if (pc_tree->partitioning == PARTITION_HORZ) { |
3862 | | int is_valid_second_rectangle = 0; |
3863 | | assert(square_size_4x4 > 1); |
3864 | | // First rectangle. |
3865 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3866 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3867 | | source_mv[0] = &mi[0]->mv[0].as_mv; |
3868 | | source_mv[1] = &mi[0]->mv[1].as_mv; |
3869 | | assign_motion_vector_info(block_width_4x4, block_height_4x4, |
3870 | | row_start_4x4, col_start_4x4, num_unit_rows, |
3871 | | num_unit_cols, source_mv, source_ref_frame, |
3872 | | motion_vector_info); |
3873 | | // Second rectangle. |
3874 | | if (square_size_4x4 == 2) { |
3875 | | is_valid_second_rectangle = 1; |
3876 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3877 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3878 | | source_mv[0] = &mi[0]->bmi[2].as_mv[0].as_mv; |
3879 | | source_mv[1] = &mi[0]->bmi[2].as_mv[1].as_mv; |
3880 | | } else { |
3881 | | const int mi_row_2 = mi_row + (block_height_4x4 >> 1); |
3882 | | const int mi_col_2 = mi_col; |
3883 | | if (mi_row_2 * 2 < num_unit_rows && mi_col_2 * 2 < num_unit_cols) { |
3884 | | const int mi_idx_2 = mi_stride * mi_row_2 + mi_col_2; |
3885 | | is_valid_second_rectangle = 1; |
3886 | | mi = mi_grid_visible + mi_idx_2; |
3887 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3888 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3889 | | source_mv[0] = &mi[0]->mv[0].as_mv; |
3890 | | source_mv[1] = &mi[0]->mv[1].as_mv; |
3891 | | } |
3892 | | } |
3893 | | if (is_valid_second_rectangle) { |
3894 | | assign_motion_vector_info( |
3895 | | block_width_4x4, block_height_4x4, row_start_4x4 + block_height_4x4, |
3896 | | col_start_4x4, num_unit_rows, num_unit_cols, source_mv, |
3897 | | source_ref_frame, motion_vector_info); |
3898 | | } |
3899 | | } else if (pc_tree->partitioning == PARTITION_VERT) { |
3900 | | int is_valid_second_rectangle = 0; |
3901 | | assert(square_size_4x4 > 1); |
3902 | | // First rectangle. |
3903 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3904 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3905 | | source_mv[0] = &mi[0]->mv[0].as_mv; |
3906 | | source_mv[1] = &mi[0]->mv[1].as_mv; |
3907 | | assign_motion_vector_info(block_width_4x4, block_height_4x4, |
3908 | | row_start_4x4, col_start_4x4, num_unit_rows, |
3909 | | num_unit_cols, source_mv, source_ref_frame, |
3910 | | motion_vector_info); |
3911 | | // Second rectangle. |
3912 | | if (square_size_4x4 == 2) { |
3913 | | is_valid_second_rectangle = 1; |
3914 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3915 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3916 | | source_mv[0] = &mi[0]->bmi[1].as_mv[0].as_mv; |
3917 | | source_mv[1] = &mi[0]->bmi[1].as_mv[1].as_mv; |
3918 | | } else { |
3919 | | const int mi_row_2 = mi_row; |
3920 | | const int mi_col_2 = mi_col + (block_width_4x4 >> 1); |
3921 | | if (mi_row_2 * 2 < num_unit_rows && mi_col_2 * 2 < num_unit_cols) { |
3922 | | const int mi_idx_2 = mi_stride * mi_row_2 + mi_col_2; |
3923 | | is_valid_second_rectangle = 1; |
3924 | | mi = mi_grid_visible + mi_idx_2; |
3925 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3926 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3927 | | source_mv[0] = &mi[0]->mv[0].as_mv; |
3928 | | source_mv[1] = &mi[0]->mv[1].as_mv; |
3929 | | } |
3930 | | } |
3931 | | if (is_valid_second_rectangle) { |
3932 | | assign_motion_vector_info( |
3933 | | block_width_4x4, block_height_4x4, row_start_4x4, |
3934 | | col_start_4x4 + block_width_4x4, num_unit_rows, num_unit_cols, |
3935 | | source_mv, source_ref_frame, motion_vector_info); |
3936 | | } |
3937 | | } else { |
3938 | | assert(pc_tree->partitioning == PARTITION_NONE || square_size_4x4 == 1); |
3939 | | source_ref_frame[0] = mi[0]->ref_frame[0]; |
3940 | | source_ref_frame[1] = mi[0]->ref_frame[1]; |
3941 | | if (square_size_4x4 == 1) { |
3942 | | const int sub8x8_row = row_start_4x4 % 2; |
3943 | | const int sub8x8_col = col_start_4x4 % 2; |
3944 | | const int sub8x8_idx = sub8x8_row * 2 + sub8x8_col; |
3945 | | source_mv[0] = &mi[0]->bmi[sub8x8_idx].as_mv[0].as_mv; |
3946 | | source_mv[1] = &mi[0]->bmi[sub8x8_idx].as_mv[1].as_mv; |
3947 | | } else { |
3948 | | source_mv[0] = &mi[0]->mv[0].as_mv; |
3949 | | source_mv[1] = &mi[0]->mv[1].as_mv; |
3950 | | } |
3951 | | assign_motion_vector_info(block_width_4x4, block_height_4x4, |
3952 | | row_start_4x4, col_start_4x4, num_unit_rows, |
3953 | | num_unit_cols, source_mv, source_ref_frame, |
3954 | | motion_vector_info); |
3955 | | } |
3956 | | |
3957 | | return; |
3958 | | } |
3959 | | // recursively traverse partition tree when partition is split. |
3960 | | assert(pc_tree->partitioning == PARTITION_SPLIT); |
3961 | | store_superblock_info(pc_tree->u.split[0], mi_grid_visible, mi_stride, |
3962 | | subblock_square_size_4x4, num_unit_rows, num_unit_cols, |
3963 | | row_start_4x4, col_start_4x4, partition_info, |
3964 | | motion_vector_info); |
3965 | | store_superblock_info(pc_tree->u.split[1], mi_grid_visible, mi_stride, |
3966 | | subblock_square_size_4x4, num_unit_rows, num_unit_cols, |
3967 | | row_start_4x4, col_start_4x4 + subblock_square_size_4x4, |
3968 | | partition_info, motion_vector_info); |
3969 | | store_superblock_info(pc_tree->u.split[2], mi_grid_visible, mi_stride, |
3970 | | subblock_square_size_4x4, num_unit_rows, num_unit_cols, |
3971 | | row_start_4x4 + subblock_square_size_4x4, col_start_4x4, |
3972 | | partition_info, motion_vector_info); |
3973 | | store_superblock_info(pc_tree->u.split[3], mi_grid_visible, mi_stride, |
3974 | | subblock_square_size_4x4, num_unit_rows, num_unit_cols, |
3975 | | row_start_4x4 + subblock_square_size_4x4, |
3976 | | col_start_4x4 + subblock_square_size_4x4, |
3977 | | partition_info, motion_vector_info); |
3978 | | } |
3979 | | #endif // CONFIG_RATE_CTRL |
3980 | | |
3981 | | #if !CONFIG_REALTIME_ONLY |
3982 | | // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are |
3983 | | // unlikely to be selected depending on previous rate-distortion optimization |
3984 | | // results, for encoding speed-up. |
3985 | | static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td, |
3986 | | TileDataEnc *tile_data, TOKENEXTRA **tp, |
3987 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
3988 | | RD_COST *rd_cost, RD_COST best_rdc, |
3989 | 3.41M | PC_TREE *pc_tree) { |
3990 | 3.41M | VP9_COMMON *const cm = &cpi->common; |
3991 | 3.41M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
3992 | 3.41M | TileInfo *const tile_info = &tile_data->tile_info; |
3993 | 3.41M | MACROBLOCK *const x = &td->mb; |
3994 | 3.41M | MACROBLOCKD *const xd = &x->e_mbd; |
3995 | 3.41M | const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; |
3996 | 3.41M | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; |
3997 | 3.41M | PARTITION_CONTEXT sl[8], sa[8]; |
3998 | 3.41M | TOKENEXTRA *tp_orig = *tp; |
3999 | 3.41M | PICK_MODE_CONTEXT *const ctx = &pc_tree->none; |
4000 | 3.41M | int i; |
4001 | 3.41M | const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
4002 | 3.41M | BLOCK_SIZE subsize; |
4003 | 3.41M | RD_COST this_rdc, sum_rdc; |
4004 | 3.41M | int do_split = bsize >= BLOCK_8X8; |
4005 | 3.41M | int do_rect = 1; |
4006 | 3.41M | INTERP_FILTER pred_interp_filter; |
4007 | | |
4008 | | // Override skipping rectangular partition operations for edge blocks |
4009 | 3.41M | const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); |
4010 | 3.41M | const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); |
4011 | 3.41M | const int xss = x->e_mbd.plane[1].subsampling_x; |
4012 | 3.41M | const int yss = x->e_mbd.plane[1].subsampling_y; |
4013 | | |
4014 | 3.41M | BLOCK_SIZE min_size = x->min_partition_size; |
4015 | 3.41M | BLOCK_SIZE max_size = x->max_partition_size; |
4016 | | |
4017 | 3.41M | int partition_none_allowed = !force_horz_split && !force_vert_split; |
4018 | 3.41M | int partition_horz_allowed = |
4019 | 3.41M | !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; |
4020 | 3.41M | int partition_vert_allowed = |
4021 | 3.41M | !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; |
4022 | | |
4023 | 3.41M | int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; |
4024 | 3.41M | int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; |
4025 | 3.41M | int must_split = 0; |
4026 | 3.41M | int should_encode_sb = 0; |
4027 | | |
4028 | | // Ref frames picked in the [i_th] quarter subblock during square partition |
4029 | | // RD search. It may be used to prune ref frame selection of rect partitions. |
4030 | 3.41M | uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; |
4031 | | |
4032 | 3.41M | int partition_mul = x->cb_rdmult; |
4033 | | |
4034 | 3.41M | (void)*tp_orig; |
4035 | | |
4036 | 3.41M | assert(num_8x8_blocks_wide_lookup[bsize] == |
4037 | 3.41M | num_8x8_blocks_high_lookup[bsize]); |
4038 | | |
4039 | 3.41M | dist_breakout_thr >>= |
4040 | 3.41M | 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); |
4041 | | |
4042 | 3.41M | rate_breakout_thr *= num_pels_log2_lookup[bsize]; |
4043 | | |
4044 | 3.41M | vp9_rd_cost_init(&this_rdc); |
4045 | 3.41M | vp9_rd_cost_init(&sum_rdc); |
4046 | | |
4047 | 3.41M | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
4048 | | |
4049 | 3.41M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
4050 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul); |
4051 | 0 | } |
4052 | 3.41M | vp9_rd_cost_update(partition_mul, x->rddiv, &best_rdc); |
4053 | | |
4054 | 3.41M | if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ && |
4055 | 3.41M | cpi->oxcf.aq_mode != LOOKAHEAD_AQ) |
4056 | 0 | x->mb_energy = vp9_block_energy(cpi, x, bsize); |
4057 | | |
4058 | 3.41M | if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { |
4059 | 0 | int cb_partition_search_ctrl = |
4060 | 0 | ((pc_tree->index == 0 || pc_tree->index == 3) + |
4061 | 0 | get_chessboard_index(cm->current_video_frame)) & |
4062 | 0 | 0x1; |
4063 | |
|
4064 | 0 | if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size) |
4065 | 0 | set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); |
4066 | 0 | } |
4067 | | |
4068 | | // Get sub block energy range |
4069 | 3.41M | if (bsize >= BLOCK_16X16) { |
4070 | 993k | int min_energy, max_energy; |
4071 | 993k | vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, |
4072 | 993k | &max_energy); |
4073 | 993k | must_split = (min_energy < -3) && (max_energy - min_energy > 2); |
4074 | 993k | } |
4075 | | |
4076 | | // Determine partition types in search according to the speed features. |
4077 | | // The threshold set here has to be of square block size. |
4078 | 3.41M | if (cpi->sf.auto_min_max_partition_size) { |
4079 | 0 | partition_none_allowed &= (bsize <= max_size); |
4080 | 0 | partition_horz_allowed &= |
4081 | 0 | ((bsize <= max_size && bsize > min_size) || force_horz_split); |
4082 | 0 | partition_vert_allowed &= |
4083 | 0 | ((bsize <= max_size && bsize > min_size) || force_vert_split); |
4084 | 0 | do_split &= bsize > min_size; |
4085 | 0 | } |
4086 | | |
4087 | 3.41M | if (cpi->sf.use_square_partition_only && |
4088 | 3.41M | (bsize > cpi->sf.use_square_only_thresh_high || |
4089 | 2.38M | bsize < cpi->sf.use_square_only_thresh_low)) { |
4090 | 944k | if (cpi->use_svc) { |
4091 | 0 | if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) |
4092 | 0 | partition_horz_allowed &= force_horz_split; |
4093 | 0 | if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) |
4094 | 0 | partition_vert_allowed &= force_vert_split; |
4095 | 944k | } else { |
4096 | 944k | partition_horz_allowed &= force_horz_split; |
4097 | 944k | partition_vert_allowed &= force_vert_split; |
4098 | 944k | } |
4099 | 944k | } |
4100 | | |
4101 | 3.41M | save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4102 | | |
4103 | 3.41M | pc_tree->partitioning = PARTITION_NONE; |
4104 | | |
4105 | 3.41M | if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) { |
4106 | 1.16M | const int do_rd_ml_partition_var_pruning = |
4107 | 1.16M | partition_none_allowed && do_split && |
4108 | 1.16M | mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && |
4109 | 1.16M | mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; |
4110 | 1.16M | if (do_rd_ml_partition_var_pruning) { |
4111 | 1.13M | ml_predict_var_rd_partitioning(cpi, x, pc_tree, bsize, mi_row, mi_col, |
4112 | 1.13M | &partition_none_allowed, &do_split); |
4113 | 1.13M | } else { |
4114 | 29.3k | vp9_zero(pc_tree->mv); |
4115 | 29.3k | } |
4116 | 1.16M | if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks. |
4117 | 1.59M | for (i = 0; i < 4; ++i) pc_tree->u.split[i]->mv = pc_tree->mv; |
4118 | 319k | } |
4119 | 1.16M | } |
4120 | | |
4121 | | // PARTITION_NONE |
4122 | 3.41M | if (partition_none_allowed) { |
4123 | 3.06M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, |
4124 | 3.06M | best_rdc.rate, best_rdc.dist); |
4125 | 3.06M | ctx->rdcost = this_rdc.rdcost; |
4126 | 3.06M | if (this_rdc.rate != INT_MAX) { |
4127 | 2.87M | if (cpi->sf.prune_ref_frame_for_rect_partitions) { |
4128 | 1.16M | const int ref1 = ctx->mic.ref_frame[0]; |
4129 | 1.16M | const int ref2 = ctx->mic.ref_frame[1]; |
4130 | 5.81M | for (i = 0; i < 4; ++i) { |
4131 | 4.65M | ref_frames_used[i] |= (1 << ref1); |
4132 | 4.65M | if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); |
4133 | 4.65M | } |
4134 | 1.16M | } |
4135 | 2.87M | if (bsize >= BLOCK_8X8) { |
4136 | 2.87M | this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
4137 | 2.87M | vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc); |
4138 | 2.87M | } |
4139 | | |
4140 | 2.87M | if (this_rdc.rdcost < best_rdc.rdcost) { |
4141 | 2.83M | MODE_INFO *mi = xd->mi[0]; |
4142 | | |
4143 | 2.83M | best_rdc = this_rdc; |
4144 | 2.83M | should_encode_sb = 1; |
4145 | 2.83M | if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; |
4146 | | |
4147 | 2.83M | if (cpi->sf.rd_ml_partition.search_early_termination) { |
4148 | | // Currently, the machine-learning based partition search early |
4149 | | // termination is only used while bsize is 16x16, 32x32 or 64x64, |
4150 | | // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. |
4151 | 0 | if (!x->e_mbd.lossless && |
4152 | 0 | !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && |
4153 | 0 | ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { |
4154 | 0 | if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) { |
4155 | 0 | do_split = 0; |
4156 | 0 | do_rect = 0; |
4157 | 0 | } |
4158 | 0 | } |
4159 | 0 | } |
4160 | | |
4161 | 2.83M | if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { |
4162 | 167k | const int use_ml_based_breakout = |
4163 | 167k | cpi->sf.rd_ml_partition.search_breakout && cm->base_qindex >= 100; |
4164 | 167k | if (use_ml_based_breakout) { |
4165 | 32.2k | if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { |
4166 | 5.86k | do_split = 0; |
4167 | 5.86k | do_rect = 0; |
4168 | 5.86k | } |
4169 | 135k | } else { |
4170 | 135k | if (!cpi->sf.rd_ml_partition.search_early_termination) { |
4171 | 135k | if ((best_rdc.dist < (dist_breakout_thr >> 2)) || |
4172 | 135k | (best_rdc.dist < dist_breakout_thr && |
4173 | 80.4k | best_rdc.rate < rate_breakout_thr)) { |
4174 | 54.7k | do_split = 0; |
4175 | 54.7k | do_rect = 0; |
4176 | 54.7k | } |
4177 | 135k | } |
4178 | 135k | } |
4179 | 167k | } |
4180 | 2.83M | } |
4181 | 2.87M | } |
4182 | 3.06M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4183 | 3.06M | } else { |
4184 | 351k | vp9_zero(ctx->pred_mv); |
4185 | 351k | ctx->mic.interp_filter = EIGHTTAP; |
4186 | 351k | } |
4187 | | |
4188 | | // store estimated motion vector |
4189 | 3.41M | store_pred_mv(x, ctx); |
4190 | | |
4191 | | // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an |
4192 | | // intra block and used for context purposes. |
4193 | 3.41M | if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) { |
4194 | 2.09M | pred_interp_filter = EIGHTTAP; |
4195 | 2.09M | } else { |
4196 | 1.31M | pred_interp_filter = ctx->mic.interp_filter; |
4197 | 1.31M | } |
4198 | | |
4199 | | // PARTITION_SPLIT |
4200 | | // TODO(jingning): use the motion vectors given by the above search as |
4201 | | // the starting point of motion search in the following partition type check. |
4202 | 3.41M | pc_tree->u.split[0]->none.rdcost = 0; |
4203 | 3.41M | pc_tree->u.split[1]->none.rdcost = 0; |
4204 | 3.41M | pc_tree->u.split[2]->none.rdcost = 0; |
4205 | 3.41M | pc_tree->u.split[3]->none.rdcost = 0; |
4206 | 3.41M | if (do_split || must_split) { |
4207 | 3.25M | subsize = get_subsize(bsize, PARTITION_SPLIT); |
4208 | 3.25M | load_pred_mv(x, ctx); |
4209 | 3.25M | if (bsize == BLOCK_8X8) { |
4210 | 2.28M | i = 4; |
4211 | 2.28M | if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) |
4212 | 2.18M | pc_tree->u.leaf_split[0]->pred_interp_filter = pred_interp_filter; |
4213 | 2.28M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4214 | 2.28M | pc_tree->u.leaf_split[0], best_rdc.rate, best_rdc.dist); |
4215 | 2.28M | if (sum_rdc.rate == INT_MAX) { |
4216 | 815k | sum_rdc.rdcost = INT64_MAX; |
4217 | 1.46M | } else { |
4218 | 1.46M | if (cpi->sf.prune_ref_frame_for_rect_partitions) { |
4219 | 596k | const int ref1 = pc_tree->u.leaf_split[0]->mic.ref_frame[0]; |
4220 | 596k | const int ref2 = pc_tree->u.leaf_split[0]->mic.ref_frame[1]; |
4221 | 2.98M | for (i = 0; i < 4; ++i) { |
4222 | 2.38M | ref_frames_used[i] |= (1 << ref1); |
4223 | 2.38M | if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); |
4224 | 2.38M | } |
4225 | 596k | } |
4226 | 1.46M | } |
4227 | 2.28M | } else { |
4228 | 4.45M | for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); |
4229 | 3.67M | ++i) { |
4230 | 3.67M | const int x_idx = (i & 1) * mi_step; |
4231 | 3.67M | const int y_idx = (i >> 1) * mi_step; |
4232 | 3.67M | int found_best_rd = 0; |
4233 | 3.67M | RD_COST best_rdc_split; |
4234 | 3.67M | vp9_rd_cost_reset(&best_rdc_split); |
4235 | | |
4236 | 3.67M | if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) { |
4237 | | // A must split test here increases the number of sub |
4238 | | // partitions but hurts metrics results quite a bit, |
4239 | | // so this extra test is commented out pending |
4240 | | // further tests on whether it adds much in terms of |
4241 | | // visual quality. |
4242 | | // (must_split) ? best_rdc.rate |
4243 | | // : best_rdc.rate - sum_rdc.rate, |
4244 | | // (must_split) ? best_rdc.dist |
4245 | | // : best_rdc.dist - sum_rdc.dist, |
4246 | 2.79M | best_rdc_split.rate = best_rdc.rate - sum_rdc.rate; |
4247 | 2.79M | best_rdc_split.dist = best_rdc.dist - sum_rdc.dist; |
4248 | 2.79M | } |
4249 | | |
4250 | 3.67M | if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) |
4251 | 336k | continue; |
4252 | | |
4253 | 3.33M | pc_tree->u.split[i]->index = i; |
4254 | 3.33M | if (cpi->sf.prune_ref_frame_for_rect_partitions) |
4255 | 1.46M | pc_tree->u.split[i]->none.rate = INT_MAX; |
4256 | 3.33M | found_best_rd = rd_pick_partition( |
4257 | 3.33M | cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, |
4258 | 3.33M | &this_rdc, best_rdc_split, pc_tree->u.split[i]); |
4259 | | |
4260 | 3.33M | if (found_best_rd == 0) { |
4261 | 188k | sum_rdc.rdcost = INT64_MAX; |
4262 | 188k | break; |
4263 | 3.14M | } else { |
4264 | 3.14M | if (cpi->sf.prune_ref_frame_for_rect_partitions && |
4265 | 3.14M | pc_tree->u.split[i]->none.rate != INT_MAX) { |
4266 | 1.14M | const int ref1 = pc_tree->u.split[i]->none.mic.ref_frame[0]; |
4267 | 1.14M | const int ref2 = pc_tree->u.split[i]->none.mic.ref_frame[1]; |
4268 | 1.14M | ref_frames_used[i] |= (1 << ref1); |
4269 | 1.14M | if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); |
4270 | 1.14M | } |
4271 | 3.14M | sum_rdc.rate += this_rdc.rate; |
4272 | 3.14M | sum_rdc.dist += this_rdc.dist; |
4273 | 3.14M | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4274 | 3.14M | } |
4275 | 3.33M | } |
4276 | 969k | } |
4277 | | |
4278 | 3.25M | if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { |
4279 | 2.11M | sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; |
4280 | 2.11M | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4281 | | |
4282 | 2.11M | if ((sum_rdc.rdcost < best_rdc.rdcost) || |
4283 | 2.11M | (must_split && (sum_rdc.dist < best_rdc.dist))) { |
4284 | 2.01M | best_rdc = sum_rdc; |
4285 | 2.01M | should_encode_sb = 1; |
4286 | 2.01M | pc_tree->partitioning = PARTITION_SPLIT; |
4287 | | |
4288 | | // Rate and distortion based partition search termination clause. |
4289 | 2.01M | if (!cpi->sf.rd_ml_partition.search_early_termination && |
4290 | 2.01M | !x->e_mbd.lossless && |
4291 | 2.01M | ((best_rdc.dist < (dist_breakout_thr >> 2)) || |
4292 | 1.73M | (best_rdc.dist < dist_breakout_thr && |
4293 | 944k | best_rdc.rate < rate_breakout_thr))) { |
4294 | 792k | do_rect = 0; |
4295 | 792k | } |
4296 | 2.01M | } |
4297 | 2.11M | } else { |
4298 | | // skip rectangular partition test when larger block size |
4299 | | // gives better rd cost |
4300 | 1.13M | if (cpi->sf.less_rectangular_check && |
4301 | 1.13M | (bsize > cpi->sf.use_square_only_thresh_high || |
4302 | 1.13M | best_rdc.dist < dist_breakout_thr)) |
4303 | 686k | do_rect &= !partition_none_allowed; |
4304 | 1.13M | } |
4305 | 3.25M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4306 | 3.25M | } |
4307 | | |
4308 | 3.41M | pc_tree->horizontal[0].skip_ref_frame_mask = 0; |
4309 | 3.41M | pc_tree->horizontal[1].skip_ref_frame_mask = 0; |
4310 | 3.41M | pc_tree->vertical[0].skip_ref_frame_mask = 0; |
4311 | 3.41M | pc_tree->vertical[1].skip_ref_frame_mask = 0; |
4312 | 3.41M | if (cpi->sf.prune_ref_frame_for_rect_partitions) { |
4313 | 1.50M | uint8_t used_frames; |
4314 | 1.50M | used_frames = ref_frames_used[0] | ref_frames_used[1]; |
4315 | 1.50M | if (used_frames) { |
4316 | 1.39M | pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff; |
4317 | 1.39M | } |
4318 | 1.50M | used_frames = ref_frames_used[2] | ref_frames_used[3]; |
4319 | 1.50M | if (used_frames) { |
4320 | 1.35M | pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff; |
4321 | 1.35M | } |
4322 | 1.50M | used_frames = ref_frames_used[0] | ref_frames_used[2]; |
4323 | 1.50M | if (used_frames) { |
4324 | 1.39M | pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff; |
4325 | 1.39M | } |
4326 | 1.50M | used_frames = ref_frames_used[1] | ref_frames_used[3]; |
4327 | 1.50M | if (used_frames) { |
4328 | 1.36M | pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff; |
4329 | 1.36M | } |
4330 | 1.50M | } |
4331 | | |
4332 | 3.41M | { |
4333 | 3.41M | const int do_ml_rect_partition_pruning = |
4334 | 3.41M | !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split && |
4335 | 3.41M | (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; |
4336 | 3.41M | if (do_ml_rect_partition_pruning) { |
4337 | 581k | ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, |
4338 | 581k | &partition_vert_allowed, best_rdc.rdcost); |
4339 | 581k | } |
4340 | 3.41M | } |
4341 | | |
4342 | | // PARTITION_HORZ |
4343 | 3.41M | if (partition_horz_allowed && |
4344 | 3.41M | (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { |
4345 | 1.41M | const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ]; |
4346 | 1.41M | subsize = get_subsize(bsize, PARTITION_HORZ); |
4347 | 1.41M | load_pred_mv(x, ctx); |
4348 | 1.41M | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4349 | 1.41M | partition_none_allowed) |
4350 | 890k | pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter; |
4351 | 1.41M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4352 | 1.41M | &pc_tree->horizontal[0], best_rdc.rate - part_mode_rate, |
4353 | 1.41M | best_rdc.dist); |
4354 | 1.41M | if (sum_rdc.rdcost < INT64_MAX) { |
4355 | 825k | sum_rdc.rate += part_mode_rate; |
4356 | 825k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4357 | 825k | } |
4358 | | |
4359 | 1.41M | if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && |
4360 | 1.41M | bsize > BLOCK_8X8) { |
4361 | 363k | PICK_MODE_CONTEXT *hctx = &pc_tree->horizontal[0]; |
4362 | 363k | update_state(cpi, td, hctx, mi_row, mi_col, subsize, 0); |
4363 | 363k | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, hctx); |
4364 | 363k | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4365 | 363k | partition_none_allowed) |
4366 | 0 | pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter; |
4367 | 363k | rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, |
4368 | 363k | subsize, &pc_tree->horizontal[1], |
4369 | 363k | best_rdc.rate - sum_rdc.rate, |
4370 | 363k | best_rdc.dist - sum_rdc.dist); |
4371 | 363k | if (this_rdc.rate == INT_MAX) { |
4372 | 250k | sum_rdc.rdcost = INT64_MAX; |
4373 | 250k | } else { |
4374 | 112k | sum_rdc.rate += this_rdc.rate; |
4375 | 112k | sum_rdc.dist += this_rdc.dist; |
4376 | 112k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4377 | 112k | } |
4378 | 363k | } |
4379 | | |
4380 | 1.41M | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4381 | 278k | best_rdc = sum_rdc; |
4382 | 278k | should_encode_sb = 1; |
4383 | 278k | pc_tree->partitioning = PARTITION_HORZ; |
4384 | | |
4385 | 278k | if (cpi->sf.less_rectangular_check && |
4386 | 278k | bsize > cpi->sf.use_square_only_thresh_high) |
4387 | 1.10k | do_rect = 0; |
4388 | 278k | } |
4389 | 1.41M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4390 | 1.41M | } |
4391 | | |
4392 | | // PARTITION_VERT |
4393 | 3.41M | if (partition_vert_allowed && |
4394 | 3.41M | (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { |
4395 | 1.36M | const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT]; |
4396 | 1.36M | subsize = get_subsize(bsize, PARTITION_VERT); |
4397 | 1.36M | load_pred_mv(x, ctx); |
4398 | 1.36M | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4399 | 1.36M | partition_none_allowed) |
4400 | 890k | pc_tree->vertical[0].pred_interp_filter = pred_interp_filter; |
4401 | 1.36M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4402 | 1.36M | &pc_tree->vertical[0], best_rdc.rate - part_mode_rate, |
4403 | 1.36M | best_rdc.dist); |
4404 | 1.36M | if (sum_rdc.rdcost < INT64_MAX) { |
4405 | 715k | sum_rdc.rate += part_mode_rate; |
4406 | 715k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4407 | 715k | } |
4408 | | |
4409 | 1.36M | if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && |
4410 | 1.36M | bsize > BLOCK_8X8) { |
4411 | 368k | update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); |
4412 | 368k | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, |
4413 | 368k | &pc_tree->vertical[0]); |
4414 | 368k | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4415 | 368k | partition_none_allowed) |
4416 | 0 | pc_tree->vertical[1].pred_interp_filter = pred_interp_filter; |
4417 | 368k | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, |
4418 | 368k | subsize, &pc_tree->vertical[1], |
4419 | 368k | best_rdc.rate - sum_rdc.rate, |
4420 | 368k | best_rdc.dist - sum_rdc.dist); |
4421 | 368k | if (this_rdc.rate == INT_MAX) { |
4422 | 273k | sum_rdc.rdcost = INT64_MAX; |
4423 | 273k | } else { |
4424 | 95.7k | sum_rdc.rate += this_rdc.rate; |
4425 | 95.7k | sum_rdc.dist += this_rdc.dist; |
4426 | 95.7k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4427 | 95.7k | } |
4428 | 368k | } |
4429 | | |
4430 | 1.36M | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4431 | 140k | best_rdc = sum_rdc; |
4432 | 140k | should_encode_sb = 1; |
4433 | 140k | pc_tree->partitioning = PARTITION_VERT; |
4434 | 140k | } |
4435 | 1.36M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4436 | 1.36M | } |
4437 | | |
4438 | 3.41M | if (bsize == BLOCK_64X64 && best_rdc.rdcost == INT64_MAX) { |
4439 | 0 | vp9_rd_cost_reset(&this_rdc); |
4440 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, BLOCK_64X64, |
4441 | 0 | ctx, INT_MAX, INT64_MAX); |
4442 | 0 | ctx->rdcost = this_rdc.rdcost; |
4443 | 0 | vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc); |
4444 | 0 | if (this_rdc.rdcost < best_rdc.rdcost) { |
4445 | 0 | best_rdc = this_rdc; |
4446 | 0 | should_encode_sb = 1; |
4447 | 0 | pc_tree->partitioning = PARTITION_NONE; |
4448 | 0 | } |
4449 | 0 | } |
4450 | | |
4451 | 3.41M | *rd_cost = best_rdc; |
4452 | | |
4453 | 3.41M | if (should_encode_sb && pc_tree->index != 3) { |
4454 | 2.60M | int output_enabled = (bsize == BLOCK_64X64); |
4455 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4456 | | start_timing(cpi, encode_sb_time); |
4457 | | #endif |
4458 | 2.60M | encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, |
4459 | 2.60M | pc_tree); |
4460 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4461 | | end_timing(cpi, encode_sb_time); |
4462 | | #endif |
4463 | | #if CONFIG_RATE_CTRL |
4464 | | if (oxcf->use_simple_encode_api) { |
4465 | | // Store partition, motion vector of the superblock. |
4466 | | if (output_enabled) { |
4467 | | const int num_unit_rows = |
4468 | | get_num_unit_4x4(cpi->frame_info.frame_height); |
4469 | | const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width); |
4470 | | store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride, |
4471 | | num_4x4_blocks_wide_lookup[BLOCK_64X64], |
4472 | | num_unit_rows, num_unit_cols, mi_row << 1, |
4473 | | mi_col << 1, cpi->partition_info, |
4474 | | cpi->motion_vector_info); |
4475 | | } |
4476 | | } |
4477 | | #endif // CONFIG_RATE_CTRL |
4478 | 2.60M | } |
4479 | | |
4480 | 3.41M | if (bsize == BLOCK_64X64) { |
4481 | 78.3k | assert(tp_orig < *tp); |
4482 | 78.3k | assert(best_rdc.rate < INT_MAX); |
4483 | 78.3k | assert(best_rdc.dist < INT64_MAX); |
4484 | 3.33M | } else { |
4485 | 3.33M | assert(tp_orig == *tp); |
4486 | 3.33M | } |
4487 | | |
4488 | 3.41M | return should_encode_sb; |
4489 | 3.41M | } |
4490 | | |
4491 | | static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, |
4492 | | TileDataEnc *tile_data, int mi_row, |
4493 | 55.4k | TOKENEXTRA **tp) { |
4494 | 55.4k | VP9_COMMON *const cm = &cpi->common; |
4495 | 55.4k | TileInfo *const tile_info = &tile_data->tile_info; |
4496 | 55.4k | MACROBLOCK *const x = &td->mb; |
4497 | 55.4k | MACROBLOCKD *const xd = &x->e_mbd; |
4498 | 55.4k | SPEED_FEATURES *const sf = &cpi->sf; |
4499 | 55.4k | const int mi_col_start = tile_info->mi_col_start; |
4500 | 55.4k | const int mi_col_end = tile_info->mi_col_end; |
4501 | 55.4k | int mi_col; |
4502 | 55.4k | const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; |
4503 | 55.4k | const int num_sb_cols = |
4504 | 55.4k | get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); |
4505 | 55.4k | int sb_col_in_tile; |
4506 | | |
4507 | | // Initialize the left context for the new SB row |
4508 | 55.4k | memset(&xd->left_context, 0, sizeof(xd->left_context)); |
4509 | 55.4k | memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); |
4510 | | |
4511 | | // Code each SB in the row |
4512 | 133k | for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; |
4513 | 78.3k | mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) { |
4514 | 78.3k | const struct segmentation *const seg = &cm->seg; |
4515 | 78.3k | int dummy_rate; |
4516 | 78.3k | int64_t dummy_dist; |
4517 | 78.3k | RD_COST dummy_rdc; |
4518 | 78.3k | int i; |
4519 | 78.3k | int seg_skip = 0; |
4520 | 78.3k | int orig_rdmult = cpi->rd.RDMULT; |
4521 | | |
4522 | 78.3k | const int idx_str = cm->mi_stride * mi_row + mi_col; |
4523 | 78.3k | MODE_INFO **mi = cm->mi_grid_visible + idx_str; |
4524 | | |
4525 | 78.3k | vp9_rd_cost_reset(&dummy_rdc); |
4526 | 78.3k | (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, |
4527 | 78.3k | sb_col_in_tile); |
4528 | | |
4529 | 78.3k | if (sf->adaptive_pred_interp_filter) { |
4530 | 5.09M | for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; |
4531 | | |
4532 | 5.09M | for (i = 0; i < 64; ++i) { |
4533 | 5.01M | td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; |
4534 | 5.01M | td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; |
4535 | 5.01M | td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; |
4536 | 5.01M | td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; |
4537 | 5.01M | } |
4538 | 78.3k | } |
4539 | | |
4540 | 391k | for (i = 0; i < MAX_REF_FRAMES; ++i) { |
4541 | 313k | x->pred_mv[i].row = INT16_MAX; |
4542 | 313k | x->pred_mv[i].col = INT16_MAX; |
4543 | 313k | } |
4544 | 78.3k | td->pc_root->index = 0; |
4545 | | |
4546 | 78.3k | if (seg->enabled) { |
4547 | 0 | const uint8_t *const map = |
4548 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
4549 | 0 | int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); |
4550 | 0 | seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); |
4551 | 0 | } |
4552 | | |
4553 | 78.3k | x->source_variance = UINT_MAX; |
4554 | | |
4555 | 78.3k | x->cb_rdmult = orig_rdmult; |
4556 | | |
4557 | 78.3k | if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { |
4558 | 0 | const BLOCK_SIZE bsize = |
4559 | 0 | seg_skip ? BLOCK_64X64 : sf->always_this_block_size; |
4560 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); |
4561 | 0 | set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); |
4562 | 0 | rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, |
4563 | 0 | &dummy_rate, &dummy_dist, 1, td->pc_root); |
4564 | 78.3k | } else if (sf->partition_search_type == VAR_BASED_PARTITION && |
4565 | 78.3k | cm->frame_type != KEY_FRAME) { |
4566 | 0 | choose_partitioning(cpi, tile_info, x, mi_row, mi_col); |
4567 | 0 | rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, |
4568 | 0 | &dummy_rate, &dummy_dist, 1, td->pc_root); |
4569 | 78.3k | } else { |
4570 | 78.3k | if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) { |
4571 | 0 | int dr = |
4572 | 0 | get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult); |
4573 | 0 | x->cb_rdmult = dr; |
4574 | 0 | } |
4575 | | |
4576 | 78.3k | if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ && cm->show_frame) { |
4577 | 0 | x->segment_id = wiener_var_segment(cpi, BLOCK_64X64, mi_row, mi_col); |
4578 | 0 | x->cb_rdmult = vp9_compute_rd_mult( |
4579 | 0 | cpi, vp9_get_qindex(&cm->seg, x->segment_id, cm->base_qindex)); |
4580 | 0 | } |
4581 | | |
4582 | | // If required set upper and lower partition size limits |
4583 | 78.3k | if (sf->auto_min_max_partition_size) { |
4584 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); |
4585 | 0 | rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, |
4586 | 0 | &x->min_partition_size, &x->max_partition_size); |
4587 | 0 | } |
4588 | 78.3k | td->pc_root->none.rdcost = 0; |
4589 | | |
4590 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4591 | | start_timing(cpi, rd_pick_partition_time); |
4592 | | #endif |
4593 | 78.3k | rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, |
4594 | 78.3k | &dummy_rdc, dummy_rdc, td->pc_root); |
4595 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4596 | | end_timing(cpi, rd_pick_partition_time); |
4597 | | #endif |
4598 | 78.3k | } |
4599 | 78.3k | (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, |
4600 | 78.3k | sb_col_in_tile, num_sb_cols); |
4601 | 78.3k | } |
4602 | 55.4k | } |
4603 | | #endif // !CONFIG_REALTIME_ONLY |
4604 | | |
4605 | 42.2k | static void init_encode_frame_mb_context(VP9_COMP *cpi) { |
4606 | 42.2k | MACROBLOCK *const x = &cpi->td.mb; |
4607 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
4608 | 42.2k | MACROBLOCKD *const xd = &x->e_mbd; |
4609 | 42.2k | const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); |
4610 | | |
4611 | | // Copy data over into macro block data structures. |
4612 | 42.2k | vp9_setup_src_planes(x, cpi->Source, 0, 0); |
4613 | | |
4614 | 42.2k | vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); |
4615 | | |
4616 | | // Note: this memset assumes above_context[0], [1] and [2] |
4617 | | // are allocated as part of the same buffer. |
4618 | 42.2k | memset(xd->above_context[0], 0, |
4619 | 42.2k | sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); |
4620 | 42.2k | memset(xd->above_seg_context, 0, |
4621 | 42.2k | sizeof(*xd->above_seg_context) * aligned_mi_cols); |
4622 | 42.2k | } |
4623 | | |
4624 | 0 | static int check_dual_ref_flags(VP9_COMP *cpi) { |
4625 | 0 | const int ref_flags = cpi->ref_frame_flags; |
4626 | |
|
4627 | 0 | if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { |
4628 | 0 | return 0; |
4629 | 0 | } else { |
4630 | 0 | return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) + |
4631 | 0 | !!(ref_flags & VP9_ALT_FLAG)) >= 2; |
4632 | 0 | } |
4633 | 0 | } |
4634 | | |
4635 | 5.70k | static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { |
4636 | 5.70k | int mi_row, mi_col; |
4637 | 5.70k | const int mis = cm->mi_stride; |
4638 | 5.70k | MODE_INFO **mi_ptr = cm->mi_grid_visible; |
4639 | | |
4640 | 35.5k | for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { |
4641 | 250k | for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { |
4642 | 221k | if (mi_ptr[mi_col]->tx_size > max_tx_size) |
4643 | 1.64k | mi_ptr[mi_col]->tx_size = max_tx_size; |
4644 | 221k | } |
4645 | 29.8k | } |
4646 | 5.70k | } |
4647 | | |
4648 | 42.2k | static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { |
4649 | 42.2k | if (frame_is_intra_only(&cpi->common)) |
4650 | 7.75k | return INTRA_FRAME; |
4651 | 34.5k | else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) |
4652 | 0 | return ALTREF_FRAME; |
4653 | 34.5k | else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) |
4654 | 2.39k | return GOLDEN_FRAME; |
4655 | 32.1k | else |
4656 | 32.1k | return LAST_FRAME; |
4657 | 42.2k | } |
4658 | | |
4659 | 42.2k | static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { |
4660 | 42.2k | if (xd->lossless) return ONLY_4X4; |
4661 | 35.4k | if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode) |
4662 | 0 | return ALLOW_16X16; |
4663 | 35.4k | if (cpi->sf.tx_size_search_method == USE_LARGESTALL) |
4664 | 13.8k | return ALLOW_32X32; |
4665 | 21.6k | else if (cpi->sf.tx_size_search_method == USE_FULL_RD || |
4666 | 21.6k | cpi->sf.tx_size_search_method == USE_TX_8X8) |
4667 | 21.6k | return TX_MODE_SELECT; |
4668 | 0 | else |
4669 | 0 | return cpi->common.tx_mode; |
4670 | 35.4k | } |
4671 | | |
4672 | | static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x, |
4673 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4674 | 0 | PICK_MODE_CONTEXT *ctx) { |
4675 | 0 | if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16) |
4676 | 0 | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); |
4677 | 0 | else |
4678 | 0 | vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); |
4679 | 0 | } |
4680 | | |
4681 | | static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x, |
4682 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4683 | | PICK_MODE_CONTEXT *ctx, |
4684 | | TileDataEnc *tile_data, int mi_row, |
4685 | 0 | int mi_col) { |
4686 | 0 | if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { |
4687 | 0 | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); |
4688 | 0 | } else { |
4689 | 0 | if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF) |
4690 | 0 | vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); |
4691 | 0 | else if (bsize >= BLOCK_8X8) |
4692 | 0 | vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, |
4693 | 0 | ctx); |
4694 | 0 | else |
4695 | 0 | vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); |
4696 | 0 | } |
4697 | 0 | } |
4698 | | |
4699 | | static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x, |
4700 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4701 | | PICK_MODE_CONTEXT *ctx, |
4702 | | TileDataEnc *tile_data, int mi_row, |
4703 | 0 | int mi_col) { |
4704 | 0 | if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { |
4705 | 0 | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); |
4706 | 0 | } else { |
4707 | 0 | vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); |
4708 | 0 | } |
4709 | 0 | } |
4710 | | |
4711 | | static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, |
4712 | | MACROBLOCK *const x, int mi_row, int mi_col, |
4713 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4714 | 0 | PICK_MODE_CONTEXT *ctx) { |
4715 | 0 | VP9_COMMON *const cm = &cpi->common; |
4716 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
4717 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
4718 | 0 | MODE_INFO *mi; |
4719 | 0 | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; |
4720 | 0 | BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size |
4721 | 0 | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; |
4722 | 0 | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; |
4723 | 0 | int plane; |
4724 | |
|
4725 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
4726 | |
|
4727 | 0 | set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); |
4728 | |
|
4729 | 0 | x->skip_recode = 0; |
4730 | |
|
4731 | 0 | mi = xd->mi[0]; |
4732 | 0 | mi->sb_type = bsize; |
4733 | |
|
4734 | 0 | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
4735 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
4736 | 0 | memcpy(a + num_4x4_blocks_wide * plane, pd->above_context, |
4737 | 0 | (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); |
4738 | 0 | memcpy(l + num_4x4_blocks_high * plane, pd->left_context, |
4739 | 0 | (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); |
4740 | 0 | } |
4741 | |
|
4742 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) |
4743 | 0 | if (cyclic_refresh_segment_id_boosted(mi->segment_id)) |
4744 | 0 | x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); |
4745 | |
|
4746 | 0 | if (frame_is_intra_only(cm)) |
4747 | 0 | hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); |
4748 | 0 | else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) |
4749 | 0 | hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, |
4750 | 0 | mi_col); |
4751 | 0 | else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) |
4752 | 0 | set_mode_info_seg_skip(x, cm->tx_mode, cm->interp_filter, rd_cost, bsize); |
4753 | 0 | else if (bsize >= BLOCK_8X8) { |
4754 | 0 | if (cpi->rc.hybrid_intra_scene_change) |
4755 | 0 | hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, |
4756 | 0 | mi_col); |
4757 | 0 | else |
4758 | 0 | vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, |
4759 | 0 | ctx); |
4760 | 0 | } else { |
4761 | 0 | vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); |
4762 | 0 | } |
4763 | |
|
4764 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); |
4765 | |
|
4766 | 0 | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
4767 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
4768 | 0 | memcpy(pd->above_context, a + num_4x4_blocks_wide * plane, |
4769 | 0 | (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); |
4770 | 0 | memcpy(pd->left_context, l + num_4x4_blocks_high * plane, |
4771 | 0 | (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); |
4772 | 0 | } |
4773 | |
|
4774 | 0 | if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost); |
4775 | |
|
4776 | 0 | ctx->rate = rd_cost->rate; |
4777 | 0 | ctx->dist = rd_cost->dist; |
4778 | 0 | } |
4779 | | |
4780 | | static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row, |
4781 | 0 | int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
4782 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
4783 | 0 | int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
4784 | 0 | PARTITION_TYPE partition = pc_tree->partitioning; |
4785 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, partition); |
4786 | |
|
4787 | 0 | assert(bsize >= BLOCK_8X8); |
4788 | |
|
4789 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
4790 | | |
4791 | 0 | switch (partition) { |
4792 | 0 | case PARTITION_NONE: |
4793 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
4794 | 0 | *(xd->mi[0]) = pc_tree->none.mic; |
4795 | 0 | *(x->mbmi_ext) = pc_tree->none.mbmi_ext; |
4796 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); |
4797 | 0 | break; |
4798 | 0 | case PARTITION_VERT: |
4799 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
4800 | 0 | *(xd->mi[0]) = pc_tree->vertical[0].mic; |
4801 | 0 | *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext; |
4802 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); |
4803 | |
|
4804 | 0 | if (mi_col + hbs < cm->mi_cols) { |
4805 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs); |
4806 | 0 | *(xd->mi[0]) = pc_tree->vertical[1].mic; |
4807 | 0 | *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext; |
4808 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize); |
4809 | 0 | } |
4810 | 0 | break; |
4811 | 0 | case PARTITION_HORZ: |
4812 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
4813 | 0 | *(xd->mi[0]) = pc_tree->horizontal[0].mic; |
4814 | 0 | *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext; |
4815 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); |
4816 | 0 | if (mi_row + hbs < cm->mi_rows) { |
4817 | 0 | set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col); |
4818 | 0 | *(xd->mi[0]) = pc_tree->horizontal[1].mic; |
4819 | 0 | *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext; |
4820 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize); |
4821 | 0 | } |
4822 | 0 | break; |
4823 | 0 | case PARTITION_SPLIT: { |
4824 | 0 | fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->u.split[0]); |
4825 | 0 | fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, |
4826 | 0 | pc_tree->u.split[1]); |
4827 | 0 | fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, |
4828 | 0 | pc_tree->u.split[2]); |
4829 | 0 | fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, |
4830 | 0 | pc_tree->u.split[3]); |
4831 | 0 | break; |
4832 | 0 | } |
4833 | 0 | default: break; |
4834 | 0 | } |
4835 | 0 | } |
4836 | | |
4837 | | // Reset the prediction pixel ready flag recursively. |
4838 | 0 | static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { |
4839 | 0 | pc_tree->none.pred_pixel_ready = 0; |
4840 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 0; |
4841 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 0; |
4842 | 0 | pc_tree->vertical[0].pred_pixel_ready = 0; |
4843 | 0 | pc_tree->vertical[1].pred_pixel_ready = 0; |
4844 | |
|
4845 | 0 | if (bsize > BLOCK_8X8) { |
4846 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); |
4847 | 0 | int i; |
4848 | 0 | for (i = 0; i < 4; ++i) |
4849 | 0 | pred_pixel_ready_reset(pc_tree->u.split[i], subsize); |
4850 | 0 | } |
4851 | 0 | } |
4852 | | |
4853 | | #define FEATURES 6 |
4854 | | #define LABELS 2 |
4855 | | static int ml_predict_var_partitioning(VP9_COMP *cpi, MACROBLOCK *x, |
4856 | | BLOCK_SIZE bsize, int mi_row, |
4857 | 0 | int mi_col) { |
4858 | 0 | VP9_COMMON *const cm = &cpi->common; |
4859 | 0 | const NN_CONFIG *nn_config = NULL; |
4860 | |
|
4861 | 0 | switch (bsize) { |
4862 | 0 | case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break; |
4863 | 0 | case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break; |
4864 | 0 | case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break; |
4865 | 0 | case BLOCK_8X8: break; |
4866 | 0 | default: assert(0 && "Unexpected block size."); return -1; |
4867 | 0 | } |
4868 | | |
4869 | 0 | if (!nn_config) return -1; |
4870 | | |
4871 | 0 | vpx_clear_system_state(); |
4872 | |
|
4873 | 0 | { |
4874 | 0 | const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; |
4875 | 0 | float features[FEATURES] = { 0.0f }; |
4876 | 0 | const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); |
4877 | 0 | int feature_idx = 0; |
4878 | 0 | float score[LABELS]; |
4879 | |
|
4880 | 0 | features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); |
4881 | 0 | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
4882 | 0 | { |
4883 | 0 | const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; |
4884 | 0 | const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); |
4885 | 0 | const int sb_offset_row = 8 * (mi_row & 7); |
4886 | 0 | const int sb_offset_col = 8 * (mi_col & 7); |
4887 | 0 | const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; |
4888 | 0 | const uint8_t *src = x->plane[0].src.buf; |
4889 | 0 | const int src_stride = x->plane[0].src.stride; |
4890 | 0 | const int pred_stride = 64; |
4891 | 0 | unsigned int sse; |
4892 | 0 | int i; |
4893 | | // Variance of whole block. |
4894 | 0 | const unsigned int var = |
4895 | 0 | cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); |
4896 | 0 | const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); |
4897 | |
|
4898 | 0 | features[feature_idx++] = logf((float)var + 1.0f); |
4899 | 0 | for (i = 0; i < 4; ++i) { |
4900 | 0 | const int x_idx = (i & 1) * bs / 2; |
4901 | 0 | const int y_idx = (i >> 1) * bs / 2; |
4902 | 0 | const int src_offset = y_idx * src_stride + x_idx; |
4903 | 0 | const int pred_offset = y_idx * pred_stride + x_idx; |
4904 | | // Variance of quarter block. |
4905 | 0 | const unsigned int sub_var = |
4906 | 0 | cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, |
4907 | 0 | pred + pred_offset, pred_stride, &sse); |
4908 | 0 | const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; |
4909 | 0 | features[feature_idx++] = var_ratio; |
4910 | 0 | } |
4911 | 0 | } |
4912 | |
|
4913 | 0 | assert(feature_idx == FEATURES); |
4914 | 0 | nn_predict(features, nn_config, score); |
4915 | 0 | if (score[0] > thresh) return PARTITION_SPLIT; |
4916 | 0 | if (score[0] < -thresh) return PARTITION_NONE; |
4917 | 0 | return -1; |
4918 | 0 | } |
4919 | 0 | } |
4920 | | #undef FEATURES |
4921 | | #undef LABELS |
4922 | | |
4923 | | static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, |
4924 | | TileDataEnc *tile_data, TOKENEXTRA **tp, |
4925 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
4926 | | RD_COST *rd_cost, int do_recon, |
4927 | 0 | int64_t best_rd, PC_TREE *pc_tree) { |
4928 | 0 | const SPEED_FEATURES *const sf = &cpi->sf; |
4929 | 0 | VP9_COMMON *const cm = &cpi->common; |
4930 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
4931 | 0 | MACROBLOCK *const x = &td->mb; |
4932 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
4933 | 0 | const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; |
4934 | 0 | TOKENEXTRA *tp_orig = *tp; |
4935 | 0 | PICK_MODE_CONTEXT *ctx = &pc_tree->none; |
4936 | 0 | int i; |
4937 | 0 | BLOCK_SIZE subsize = bsize; |
4938 | 0 | RD_COST this_rdc, sum_rdc, best_rdc; |
4939 | 0 | int do_split = bsize >= BLOCK_8X8; |
4940 | 0 | int do_rect = 1; |
4941 | | // Override skipping rectangular partition operations for edge blocks |
4942 | 0 | const int force_horz_split = (mi_row + ms >= cm->mi_rows); |
4943 | 0 | const int force_vert_split = (mi_col + ms >= cm->mi_cols); |
4944 | 0 | const int xss = x->e_mbd.plane[1].subsampling_x; |
4945 | 0 | const int yss = x->e_mbd.plane[1].subsampling_y; |
4946 | |
|
4947 | 0 | int partition_none_allowed = !force_horz_split && !force_vert_split; |
4948 | 0 | int partition_horz_allowed = |
4949 | 0 | !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; |
4950 | 0 | int partition_vert_allowed = |
4951 | 0 | !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; |
4952 | 0 | const int use_ml_based_partitioning = |
4953 | 0 | sf->partition_search_type == ML_BASED_PARTITION; |
4954 | |
|
4955 | 0 | (void)*tp_orig; |
4956 | | |
4957 | | // Avoid checking for rectangular partitions for speed >= 5. |
4958 | 0 | if (cpi->oxcf.speed >= 5) do_rect = 0; |
4959 | |
|
4960 | 0 | assert(num_8x8_blocks_wide_lookup[bsize] == |
4961 | 0 | num_8x8_blocks_high_lookup[bsize]); |
4962 | |
|
4963 | 0 | vp9_rd_cost_init(&sum_rdc); |
4964 | 0 | vp9_rd_cost_reset(&best_rdc); |
4965 | 0 | best_rdc.rdcost = best_rd; |
4966 | | |
4967 | | // Determine partition types in search according to the speed features. |
4968 | | // The threshold set here has to be of square block size. |
4969 | 0 | if (sf->auto_min_max_partition_size) { |
4970 | 0 | partition_none_allowed &= |
4971 | 0 | (bsize <= x->max_partition_size && bsize >= x->min_partition_size); |
4972 | 0 | partition_horz_allowed &= |
4973 | 0 | ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || |
4974 | 0 | force_horz_split); |
4975 | 0 | partition_vert_allowed &= |
4976 | 0 | ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || |
4977 | 0 | force_vert_split); |
4978 | 0 | do_split &= bsize > x->min_partition_size; |
4979 | 0 | } |
4980 | 0 | if (sf->use_square_partition_only) { |
4981 | 0 | partition_horz_allowed &= force_horz_split; |
4982 | 0 | partition_vert_allowed &= force_vert_split; |
4983 | 0 | } |
4984 | |
|
4985 | 0 | if (use_ml_based_partitioning) { |
4986 | 0 | if (partition_none_allowed || do_split) do_rect = 0; |
4987 | 0 | if (partition_none_allowed && do_split) { |
4988 | 0 | const int ml_predicted_partition = |
4989 | 0 | ml_predict_var_partitioning(cpi, x, bsize, mi_row, mi_col); |
4990 | 0 | if (ml_predicted_partition == PARTITION_NONE) do_split = 0; |
4991 | 0 | if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; |
4992 | 0 | } |
4993 | 0 | } |
4994 | |
|
4995 | 0 | if (!partition_none_allowed && !do_split) do_rect = 1; |
4996 | |
|
4997 | 0 | ctx->pred_pixel_ready = |
4998 | 0 | !(partition_vert_allowed || partition_horz_allowed || do_split); |
4999 | | |
5000 | | // PARTITION_NONE |
5001 | 0 | if (partition_none_allowed) { |
5002 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, |
5003 | 0 | ctx); |
5004 | 0 | ctx->mic = *xd->mi[0]; |
5005 | 0 | ctx->mbmi_ext = *x->mbmi_ext; |
5006 | 0 | ctx->skip_txfm[0] = x->skip_txfm[0]; |
5007 | 0 | ctx->skip = x->skip; |
5008 | |
|
5009 | 0 | if (this_rdc.rate != INT_MAX) { |
5010 | 0 | const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
5011 | 0 | this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
5012 | 0 | this_rdc.rdcost = |
5013 | 0 | RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); |
5014 | 0 | if (this_rdc.rdcost < best_rdc.rdcost) { |
5015 | 0 | best_rdc = this_rdc; |
5016 | 0 | if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; |
5017 | |
|
5018 | 0 | if (!use_ml_based_partitioning) { |
5019 | 0 | int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; |
5020 | 0 | int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; |
5021 | 0 | dist_breakout_thr >>= |
5022 | 0 | 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); |
5023 | 0 | rate_breakout_thr *= num_pels_log2_lookup[bsize]; |
5024 | 0 | if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && |
5025 | 0 | this_rdc.dist < dist_breakout_thr) { |
5026 | 0 | do_split = 0; |
5027 | 0 | do_rect = 0; |
5028 | 0 | } |
5029 | 0 | } |
5030 | 0 | } |
5031 | 0 | } |
5032 | 0 | } |
5033 | | |
5034 | | // store estimated motion vector |
5035 | 0 | store_pred_mv(x, ctx); |
5036 | | |
5037 | | // PARTITION_SPLIT |
5038 | 0 | if (do_split) { |
5039 | 0 | int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
5040 | 0 | sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; |
5041 | 0 | sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); |
5042 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
5043 | 0 | for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { |
5044 | 0 | const int x_idx = (i & 1) * ms; |
5045 | 0 | const int y_idx = (i >> 1) * ms; |
5046 | |
|
5047 | 0 | if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) |
5048 | 0 | continue; |
5049 | 0 | load_pred_mv(x, ctx); |
5050 | 0 | nonrd_pick_partition( |
5051 | 0 | cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, |
5052 | 0 | &this_rdc, 0, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->u.split[i]); |
5053 | |
|
5054 | 0 | if (this_rdc.rate == INT_MAX) { |
5055 | 0 | vp9_rd_cost_reset(&sum_rdc); |
5056 | 0 | } else { |
5057 | 0 | sum_rdc.rate += this_rdc.rate; |
5058 | 0 | sum_rdc.dist += this_rdc.dist; |
5059 | 0 | sum_rdc.rdcost += this_rdc.rdcost; |
5060 | 0 | } |
5061 | 0 | } |
5062 | |
|
5063 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost) { |
5064 | 0 | best_rdc = sum_rdc; |
5065 | 0 | pc_tree->partitioning = PARTITION_SPLIT; |
5066 | 0 | } else { |
5067 | | // skip rectangular partition test when larger block size |
5068 | | // gives better rd cost |
5069 | 0 | if (sf->less_rectangular_check) do_rect &= !partition_none_allowed; |
5070 | 0 | } |
5071 | 0 | } |
5072 | | |
5073 | | // PARTITION_HORZ |
5074 | 0 | if (partition_horz_allowed && do_rect) { |
5075 | 0 | subsize = get_subsize(bsize, PARTITION_HORZ); |
5076 | 0 | load_pred_mv(x, ctx); |
5077 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 1; |
5078 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
5079 | 0 | &pc_tree->horizontal[0]); |
5080 | |
|
5081 | 0 | pc_tree->horizontal[0].mic = *xd->mi[0]; |
5082 | 0 | pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; |
5083 | 0 | pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; |
5084 | 0 | pc_tree->horizontal[0].skip = x->skip; |
5085 | |
|
5086 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { |
5087 | 0 | load_pred_mv(x, ctx); |
5088 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 1; |
5089 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc, |
5090 | 0 | subsize, &pc_tree->horizontal[1]); |
5091 | |
|
5092 | 0 | pc_tree->horizontal[1].mic = *xd->mi[0]; |
5093 | 0 | pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; |
5094 | 0 | pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; |
5095 | 0 | pc_tree->horizontal[1].skip = x->skip; |
5096 | |
|
5097 | 0 | if (this_rdc.rate == INT_MAX) { |
5098 | 0 | vp9_rd_cost_reset(&sum_rdc); |
5099 | 0 | } else { |
5100 | 0 | int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
5101 | 0 | this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; |
5102 | 0 | sum_rdc.rate += this_rdc.rate; |
5103 | 0 | sum_rdc.dist += this_rdc.dist; |
5104 | 0 | sum_rdc.rdcost = |
5105 | 0 | RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); |
5106 | 0 | } |
5107 | 0 | } |
5108 | |
|
5109 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost) { |
5110 | 0 | best_rdc = sum_rdc; |
5111 | 0 | pc_tree->partitioning = PARTITION_HORZ; |
5112 | 0 | } else { |
5113 | 0 | pred_pixel_ready_reset(pc_tree, bsize); |
5114 | 0 | } |
5115 | 0 | } |
5116 | | |
5117 | | // PARTITION_VERT |
5118 | 0 | if (partition_vert_allowed && do_rect) { |
5119 | 0 | subsize = get_subsize(bsize, PARTITION_VERT); |
5120 | 0 | load_pred_mv(x, ctx); |
5121 | 0 | pc_tree->vertical[0].pred_pixel_ready = 1; |
5122 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
5123 | 0 | &pc_tree->vertical[0]); |
5124 | 0 | pc_tree->vertical[0].mic = *xd->mi[0]; |
5125 | 0 | pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; |
5126 | 0 | pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; |
5127 | 0 | pc_tree->vertical[0].skip = x->skip; |
5128 | |
|
5129 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { |
5130 | 0 | load_pred_mv(x, ctx); |
5131 | 0 | pc_tree->vertical[1].pred_pixel_ready = 1; |
5132 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, |
5133 | 0 | subsize, &pc_tree->vertical[1]); |
5134 | 0 | pc_tree->vertical[1].mic = *xd->mi[0]; |
5135 | 0 | pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; |
5136 | 0 | pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; |
5137 | 0 | pc_tree->vertical[1].skip = x->skip; |
5138 | |
|
5139 | 0 | if (this_rdc.rate == INT_MAX) { |
5140 | 0 | vp9_rd_cost_reset(&sum_rdc); |
5141 | 0 | } else { |
5142 | 0 | int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
5143 | 0 | sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; |
5144 | 0 | sum_rdc.rate += this_rdc.rate; |
5145 | 0 | sum_rdc.dist += this_rdc.dist; |
5146 | 0 | sum_rdc.rdcost = |
5147 | 0 | RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); |
5148 | 0 | } |
5149 | 0 | } |
5150 | |
|
5151 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost) { |
5152 | 0 | best_rdc = sum_rdc; |
5153 | 0 | pc_tree->partitioning = PARTITION_VERT; |
5154 | 0 | } else { |
5155 | 0 | pred_pixel_ready_reset(pc_tree, bsize); |
5156 | 0 | } |
5157 | 0 | } |
5158 | |
|
5159 | 0 | *rd_cost = best_rdc; |
5160 | |
|
5161 | 0 | if (best_rdc.rate == INT_MAX) { |
5162 | 0 | vp9_rd_cost_reset(rd_cost); |
5163 | 0 | return; |
5164 | 0 | } |
5165 | | |
5166 | | // update mode info array |
5167 | 0 | fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree); |
5168 | |
|
5169 | 0 | if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) { |
5170 | 0 | int output_enabled = (bsize == BLOCK_64X64); |
5171 | 0 | encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, |
5172 | 0 | pc_tree); |
5173 | 0 | } |
5174 | |
|
5175 | 0 | if (bsize == BLOCK_64X64 && do_recon) { |
5176 | 0 | assert(tp_orig < *tp); |
5177 | 0 | assert(best_rdc.rate < INT_MAX); |
5178 | 0 | assert(best_rdc.dist < INT64_MAX); |
5179 | 0 | } else { |
5180 | 0 | assert(tp_orig == *tp); |
5181 | 0 | } |
5182 | 0 | } |
5183 | | |
5184 | | static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, |
5185 | | TileDataEnc *tile_data, MODE_INFO **mi, |
5186 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
5187 | | BLOCK_SIZE bsize, int output_enabled, |
5188 | 0 | RD_COST *rd_cost, PC_TREE *pc_tree) { |
5189 | 0 | VP9_COMMON *const cm = &cpi->common; |
5190 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
5191 | 0 | MACROBLOCK *const x = &td->mb; |
5192 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
5193 | 0 | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
5194 | 0 | const int mis = cm->mi_stride; |
5195 | 0 | PARTITION_TYPE partition; |
5196 | 0 | BLOCK_SIZE subsize; |
5197 | 0 | RD_COST this_rdc; |
5198 | 0 | BLOCK_SIZE subsize_ref = |
5199 | 0 | (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16; |
5200 | |
|
5201 | 0 | vp9_rd_cost_reset(&this_rdc); |
5202 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
5203 | | |
5204 | 0 | subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; |
5205 | 0 | partition = partition_lookup[bsl][subsize]; |
5206 | |
|
5207 | 0 | if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) { |
5208 | 0 | x->max_partition_size = BLOCK_32X32; |
5209 | 0 | x->min_partition_size = BLOCK_16X16; |
5210 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, |
5211 | 0 | 0, INT64_MAX, pc_tree); |
5212 | 0 | } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && |
5213 | 0 | subsize >= subsize_ref) { |
5214 | 0 | x->max_partition_size = BLOCK_32X32; |
5215 | 0 | x->min_partition_size = BLOCK_8X8; |
5216 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, |
5217 | 0 | 0, INT64_MAX, pc_tree); |
5218 | 0 | } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { |
5219 | 0 | x->max_partition_size = BLOCK_16X16; |
5220 | 0 | x->min_partition_size = BLOCK_8X8; |
5221 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, |
5222 | 0 | 0, INT64_MAX, pc_tree); |
5223 | 0 | } else { |
5224 | 0 | switch (partition) { |
5225 | 0 | case PARTITION_NONE: |
5226 | 0 | pc_tree->none.pred_pixel_ready = 1; |
5227 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, |
5228 | 0 | &pc_tree->none); |
5229 | 0 | pc_tree->none.mic = *xd->mi[0]; |
5230 | 0 | pc_tree->none.mbmi_ext = *x->mbmi_ext; |
5231 | 0 | pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; |
5232 | 0 | pc_tree->none.skip = x->skip; |
5233 | 0 | break; |
5234 | 0 | case PARTITION_VERT: |
5235 | 0 | pc_tree->vertical[0].pred_pixel_ready = 1; |
5236 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, |
5237 | 0 | &pc_tree->vertical[0]); |
5238 | 0 | pc_tree->vertical[0].mic = *xd->mi[0]; |
5239 | 0 | pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; |
5240 | 0 | pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; |
5241 | 0 | pc_tree->vertical[0].skip = x->skip; |
5242 | 0 | if (mi_col + hbs < cm->mi_cols) { |
5243 | 0 | pc_tree->vertical[1].pred_pixel_ready = 1; |
5244 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, |
5245 | 0 | &this_rdc, subsize, &pc_tree->vertical[1]); |
5246 | 0 | pc_tree->vertical[1].mic = *xd->mi[0]; |
5247 | 0 | pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; |
5248 | 0 | pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; |
5249 | 0 | pc_tree->vertical[1].skip = x->skip; |
5250 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
5251 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
5252 | 0 | rd_cost->rate += this_rdc.rate; |
5253 | 0 | rd_cost->dist += this_rdc.dist; |
5254 | 0 | } |
5255 | 0 | } |
5256 | 0 | break; |
5257 | 0 | case PARTITION_HORZ: |
5258 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 1; |
5259 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, |
5260 | 0 | &pc_tree->horizontal[0]); |
5261 | 0 | pc_tree->horizontal[0].mic = *xd->mi[0]; |
5262 | 0 | pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; |
5263 | 0 | pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; |
5264 | 0 | pc_tree->horizontal[0].skip = x->skip; |
5265 | 0 | if (mi_row + hbs < cm->mi_rows) { |
5266 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 1; |
5267 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, |
5268 | 0 | &this_rdc, subsize, &pc_tree->horizontal[1]); |
5269 | 0 | pc_tree->horizontal[1].mic = *xd->mi[0]; |
5270 | 0 | pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; |
5271 | 0 | pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; |
5272 | 0 | pc_tree->horizontal[1].skip = x->skip; |
5273 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
5274 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
5275 | 0 | rd_cost->rate += this_rdc.rate; |
5276 | 0 | rd_cost->dist += this_rdc.dist; |
5277 | 0 | } |
5278 | 0 | } |
5279 | 0 | break; |
5280 | 0 | default: |
5281 | 0 | assert(partition == PARTITION_SPLIT); |
5282 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
5283 | 0 | nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5284 | 0 | subsize, output_enabled, rd_cost, |
5285 | 0 | pc_tree->u.split[0]); |
5286 | 0 | nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, |
5287 | 0 | mi_col + hbs, subsize, output_enabled, &this_rdc, |
5288 | 0 | pc_tree->u.split[1]); |
5289 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
5290 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
5291 | 0 | rd_cost->rate += this_rdc.rate; |
5292 | 0 | rd_cost->dist += this_rdc.dist; |
5293 | 0 | } |
5294 | 0 | nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp, |
5295 | 0 | mi_row + hbs, mi_col, subsize, output_enabled, |
5296 | 0 | &this_rdc, pc_tree->u.split[2]); |
5297 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
5298 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
5299 | 0 | rd_cost->rate += this_rdc.rate; |
5300 | 0 | rd_cost->dist += this_rdc.dist; |
5301 | 0 | } |
5302 | 0 | nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, |
5303 | 0 | mi_row + hbs, mi_col + hbs, subsize, |
5304 | 0 | output_enabled, &this_rdc, pc_tree->u.split[3]); |
5305 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
5306 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
5307 | 0 | rd_cost->rate += this_rdc.rate; |
5308 | 0 | rd_cost->dist += this_rdc.dist; |
5309 | 0 | } |
5310 | 0 | break; |
5311 | 0 | } |
5312 | 0 | } |
5313 | | |
5314 | 0 | if (bsize == BLOCK_64X64 && output_enabled) |
5315 | 0 | encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); |
5316 | 0 | } |
5317 | | |
5318 | | static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, |
5319 | | TileDataEnc *tile_data, MODE_INFO **mi, |
5320 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
5321 | | BLOCK_SIZE bsize, int output_enabled, |
5322 | 0 | RD_COST *dummy_cost, PC_TREE *pc_tree) { |
5323 | 0 | VP9_COMMON *const cm = &cpi->common; |
5324 | 0 | TileInfo *tile_info = &tile_data->tile_info; |
5325 | 0 | MACROBLOCK *const x = &td->mb; |
5326 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
5327 | 0 | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
5328 | 0 | const int mis = cm->mi_stride; |
5329 | 0 | PARTITION_TYPE partition; |
5330 | 0 | BLOCK_SIZE subsize; |
5331 | |
|
5332 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
5333 | | |
5334 | 0 | subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; |
5335 | 0 | partition = partition_lookup[bsl][subsize]; |
5336 | |
|
5337 | 0 | if (output_enabled && bsize != BLOCK_4X4) { |
5338 | 0 | int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
5339 | 0 | td->counts->partition[ctx][partition]++; |
5340 | 0 | } |
5341 | |
|
5342 | 0 | switch (partition) { |
5343 | 0 | case PARTITION_NONE: |
5344 | 0 | pc_tree->none.pred_pixel_ready = 1; |
5345 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5346 | 0 | subsize, &pc_tree->none); |
5347 | 0 | pc_tree->none.mic = *xd->mi[0]; |
5348 | 0 | pc_tree->none.mbmi_ext = *x->mbmi_ext; |
5349 | 0 | pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; |
5350 | 0 | pc_tree->none.skip = x->skip; |
5351 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5352 | 0 | subsize, &pc_tree->none); |
5353 | 0 | break; |
5354 | 0 | case PARTITION_VERT: |
5355 | 0 | pc_tree->vertical[0].pred_pixel_ready = 1; |
5356 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5357 | 0 | subsize, &pc_tree->vertical[0]); |
5358 | 0 | pc_tree->vertical[0].mic = *xd->mi[0]; |
5359 | 0 | pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; |
5360 | 0 | pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; |
5361 | 0 | pc_tree->vertical[0].skip = x->skip; |
5362 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5363 | 0 | subsize, &pc_tree->vertical[0]); |
5364 | 0 | if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
5365 | 0 | pc_tree->vertical[1].pred_pixel_ready = 1; |
5366 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, |
5367 | 0 | subsize, &pc_tree->vertical[1]); |
5368 | 0 | pc_tree->vertical[1].mic = *xd->mi[0]; |
5369 | 0 | pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; |
5370 | 0 | pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; |
5371 | 0 | pc_tree->vertical[1].skip = x->skip; |
5372 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, |
5373 | 0 | output_enabled, subsize, &pc_tree->vertical[1]); |
5374 | 0 | } |
5375 | 0 | break; |
5376 | 0 | case PARTITION_HORZ: |
5377 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 1; |
5378 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5379 | 0 | subsize, &pc_tree->horizontal[0]); |
5380 | 0 | pc_tree->horizontal[0].mic = *xd->mi[0]; |
5381 | 0 | pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; |
5382 | 0 | pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; |
5383 | 0 | pc_tree->horizontal[0].skip = x->skip; |
5384 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5385 | 0 | subsize, &pc_tree->horizontal[0]); |
5386 | |
|
5387 | 0 | if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
5388 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 1; |
5389 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, |
5390 | 0 | subsize, &pc_tree->horizontal[1]); |
5391 | 0 | pc_tree->horizontal[1].mic = *xd->mi[0]; |
5392 | 0 | pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; |
5393 | 0 | pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; |
5394 | 0 | pc_tree->horizontal[1].skip = x->skip; |
5395 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, |
5396 | 0 | output_enabled, subsize, &pc_tree->horizontal[1]); |
5397 | 0 | } |
5398 | 0 | break; |
5399 | 0 | default: |
5400 | 0 | assert(partition == PARTITION_SPLIT); |
5401 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
5402 | 0 | if (bsize == BLOCK_8X8) { |
5403 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5404 | 0 | subsize, pc_tree->u.leaf_split[0]); |
5405 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5406 | 0 | subsize, pc_tree->u.leaf_split[0]); |
5407 | 0 | } else { |
5408 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, |
5409 | 0 | output_enabled, dummy_cost, pc_tree->u.split[0]); |
5410 | 0 | nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, |
5411 | 0 | mi_col + hbs, subsize, output_enabled, dummy_cost, |
5412 | 0 | pc_tree->u.split[1]); |
5413 | 0 | nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp, |
5414 | 0 | mi_row + hbs, mi_col, subsize, output_enabled, |
5415 | 0 | dummy_cost, pc_tree->u.split[2]); |
5416 | 0 | nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, |
5417 | 0 | mi_row + hbs, mi_col + hbs, subsize, output_enabled, |
5418 | 0 | dummy_cost, pc_tree->u.split[3]); |
5419 | 0 | } |
5420 | 0 | break; |
5421 | 0 | } |
5422 | | |
5423 | 0 | if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
5424 | 0 | update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
5425 | 0 | } |
5426 | | |
5427 | | // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock. |
5428 | | static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, |
5429 | 0 | MACROBLOCK *x, int mi_row, int mi_col) { |
5430 | 0 | VP9_COMMON *const cm = &cpi->common; |
5431 | 0 | const int is_key_frame = frame_is_intra_only(cm); |
5432 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
5433 | |
|
5434 | 0 | set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); |
5435 | |
|
5436 | 0 | if (!is_key_frame) { |
5437 | 0 | MODE_INFO *mi = xd->mi[0]; |
5438 | 0 | YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); |
5439 | 0 | const YV12_BUFFER_CONFIG *yv12_g = NULL; |
5440 | 0 | const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + |
5441 | 0 | (mi_row + 4 < cm->mi_rows); |
5442 | 0 | unsigned int y_sad_g, y_sad_thr; |
5443 | 0 | unsigned int y_sad = UINT_MAX; |
5444 | |
|
5445 | 0 | assert(yv12 != NULL); |
5446 | |
|
5447 | 0 | if (!(is_one_pass_svc(cpi) && cpi->svc.spatial_layer_id) || |
5448 | 0 | cpi->svc.use_gf_temporal_ref_current_layer) { |
5449 | | // For now, GOLDEN will not be used for non-zero spatial layers, since |
5450 | | // it may not be a temporal reference. |
5451 | 0 | yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); |
5452 | 0 | } |
5453 | | |
5454 | | // Only compute y_sad_g (sad for golden reference) for speed < 8. |
5455 | 0 | if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && |
5456 | 0 | (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { |
5457 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
5458 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
5459 | 0 | y_sad_g = cpi->fn_ptr[bsize].sdf( |
5460 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
5461 | 0 | xd->plane[0].pre[0].stride); |
5462 | 0 | } else { |
5463 | 0 | y_sad_g = UINT_MAX; |
5464 | 0 | } |
5465 | |
|
5466 | 0 | if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && |
5467 | 0 | cpi->rc.is_src_frame_alt_ref) { |
5468 | 0 | yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); |
5469 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
5470 | 0 | &cm->frame_refs[ALTREF_FRAME - 1].sf); |
5471 | 0 | mi->ref_frame[0] = ALTREF_FRAME; |
5472 | 0 | y_sad_g = UINT_MAX; |
5473 | 0 | } else { |
5474 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
5475 | 0 | &cm->frame_refs[LAST_FRAME - 1].sf); |
5476 | 0 | mi->ref_frame[0] = LAST_FRAME; |
5477 | 0 | } |
5478 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
5479 | 0 | mi->sb_type = BLOCK_64X64; |
5480 | 0 | mi->mv[0].as_int = 0; |
5481 | 0 | mi->interp_filter = BILINEAR; |
5482 | |
|
5483 | 0 | { |
5484 | 0 | const MV dummy_mv = { 0, 0 }; |
5485 | 0 | y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, |
5486 | 0 | &dummy_mv); |
5487 | 0 | x->sb_use_mv_part = 1; |
5488 | 0 | x->sb_mvcol_part = mi->mv[0].as_mv.col; |
5489 | 0 | x->sb_mvrow_part = mi->mv[0].as_mv.row; |
5490 | 0 | } |
5491 | | |
5492 | | // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad |
5493 | | // are close if short_circuit_low_temp_var is on. |
5494 | 0 | y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; |
5495 | 0 | if (y_sad_g < y_sad_thr) { |
5496 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
5497 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
5498 | 0 | mi->ref_frame[0] = GOLDEN_FRAME; |
5499 | 0 | mi->mv[0].as_int = 0; |
5500 | 0 | } else { |
5501 | 0 | x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; |
5502 | 0 | } |
5503 | |
|
5504 | 0 | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
5505 | 0 | xd->plane[0].dst.buf = x->est_pred; |
5506 | 0 | xd->plane[0].dst.stride = 64; |
5507 | 0 | vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); |
5508 | 0 | } else { |
5509 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
5510 | 0 | switch (xd->bd) { |
5511 | 0 | case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break; |
5512 | 0 | case 10: |
5513 | 0 | memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0])); |
5514 | 0 | break; |
5515 | 0 | case 12: |
5516 | 0 | memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0])); |
5517 | 0 | break; |
5518 | 0 | } |
5519 | | #else |
5520 | | memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); |
5521 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
5522 | 0 | } |
5523 | 0 | } |
5524 | | |
5525 | | static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, |
5526 | | TileDataEnc *tile_data, int mi_row, |
5527 | 0 | TOKENEXTRA **tp) { |
5528 | 0 | SPEED_FEATURES *const sf = &cpi->sf; |
5529 | 0 | VP9_COMMON *const cm = &cpi->common; |
5530 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
5531 | 0 | MACROBLOCK *const x = &td->mb; |
5532 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
5533 | 0 | const int mi_col_start = tile_info->mi_col_start; |
5534 | 0 | const int mi_col_end = tile_info->mi_col_end; |
5535 | 0 | int mi_col; |
5536 | 0 | const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; |
5537 | 0 | const int num_sb_cols = |
5538 | 0 | get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); |
5539 | 0 | int sb_col_in_tile; |
5540 | | |
5541 | | // Initialize the left context for the new SB row |
5542 | 0 | memset(&xd->left_context, 0, sizeof(xd->left_context)); |
5543 | 0 | memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); |
5544 | | |
5545 | | // Code each SB in the row |
5546 | 0 | for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; |
5547 | 0 | mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { |
5548 | 0 | const struct segmentation *const seg = &cm->seg; |
5549 | 0 | RD_COST dummy_rdc; |
5550 | 0 | const int idx_str = cm->mi_stride * mi_row + mi_col; |
5551 | 0 | MODE_INFO **mi = cm->mi_grid_visible + idx_str; |
5552 | 0 | PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; |
5553 | 0 | BLOCK_SIZE bsize = BLOCK_64X64; |
5554 | 0 | int seg_skip = 0; |
5555 | 0 | int i; |
5556 | |
|
5557 | 0 | (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, |
5558 | 0 | sb_col_in_tile); |
5559 | |
|
5560 | 0 | if (cpi->use_skin_detection) { |
5561 | 0 | vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col); |
5562 | 0 | } |
5563 | |
|
5564 | 0 | x->source_variance = UINT_MAX; |
5565 | 0 | for (i = 0; i < MAX_REF_FRAMES; ++i) { |
5566 | 0 | x->pred_mv[i].row = INT16_MAX; |
5567 | 0 | x->pred_mv[i].col = INT16_MAX; |
5568 | 0 | } |
5569 | 0 | vp9_rd_cost_init(&dummy_rdc); |
5570 | 0 | x->color_sensitivity[0] = 0; |
5571 | 0 | x->color_sensitivity[1] = 0; |
5572 | 0 | x->sb_is_skin = 0; |
5573 | 0 | x->skip_low_source_sad = 0; |
5574 | 0 | x->lowvar_highsumdiff = 0; |
5575 | 0 | x->content_state_sb = 0; |
5576 | 0 | x->zero_temp_sad_source = 0; |
5577 | 0 | x->sb_use_mv_part = 0; |
5578 | 0 | x->sb_mvcol_part = 0; |
5579 | 0 | x->sb_mvrow_part = 0; |
5580 | 0 | x->sb_pickmode_part = 0; |
5581 | 0 | x->arf_frame_usage = 0; |
5582 | 0 | x->lastgolden_frame_usage = 0; |
5583 | |
|
5584 | 0 | if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { |
5585 | 0 | int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); |
5586 | 0 | int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
5587 | 0 | int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2); |
5588 | 0 | if (sf->adapt_partition_source_sad && |
5589 | 0 | (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref && |
5590 | 0 | source_sad > sf->adapt_partition_thresh && |
5591 | 0 | (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) |
5592 | 0 | partition_search_type = REFERENCE_PARTITION; |
5593 | 0 | } |
5594 | |
|
5595 | 0 | if (seg->enabled) { |
5596 | 0 | const uint8_t *const map = |
5597 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
5598 | 0 | int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); |
5599 | 0 | seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); |
5600 | |
|
5601 | 0 | if (cpi->roi.enabled && cpi->roi.skip[BACKGROUND_SEG_SKIP_ID] && |
5602 | 0 | cpi->rc.frames_since_key > FRAMES_NO_SKIPPING_AFTER_KEY && |
5603 | 0 | x->content_state_sb > kLowSadLowSumdiff) { |
5604 | | // For ROI with skip, force segment = 0 (no skip) over whole |
5605 | | // superblock to avoid artifacts if temporal change in source_sad is |
5606 | | // not 0. |
5607 | 0 | int xi, yi; |
5608 | 0 | const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
5609 | 0 | const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
5610 | 0 | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
5611 | 0 | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
5612 | 0 | const int block_index = mi_row * cm->mi_cols + mi_col; |
5613 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
5614 | 0 | for (yi = 0; yi < ymis; yi++) |
5615 | 0 | for (xi = 0; xi < xmis; xi++) { |
5616 | 0 | int map_offset = block_index + yi * cm->mi_cols + xi; |
5617 | 0 | cpi->segmentation_map[map_offset] = 0; |
5618 | 0 | } |
5619 | 0 | set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); |
5620 | 0 | seg_skip = 0; |
5621 | 0 | } |
5622 | 0 | if (seg_skip) { |
5623 | 0 | partition_search_type = FIXED_PARTITION; |
5624 | 0 | } |
5625 | 0 | } |
5626 | | |
5627 | | // Set the partition type of the 64X64 block |
5628 | 0 | switch (partition_search_type) { |
5629 | 0 | case VAR_BASED_PARTITION: |
5630 | | // TODO(jingning, marpan): The mode decision and encoding process |
5631 | | // support both intra and inter sub8x8 block coding for RTC mode. |
5632 | | // Tune the thresholds accordingly to use sub8x8 block coding for |
5633 | | // coding performance improvement. |
5634 | 0 | choose_partitioning(cpi, tile_info, x, mi_row, mi_col); |
5635 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5636 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5637 | 0 | break; |
5638 | 0 | case ML_BASED_PARTITION: |
5639 | 0 | get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); |
5640 | 0 | x->max_partition_size = BLOCK_64X64; |
5641 | 0 | x->min_partition_size = BLOCK_8X8; |
5642 | 0 | x->sb_pickmode_part = 1; |
5643 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, |
5644 | 0 | BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, |
5645 | 0 | td->pc_root); |
5646 | 0 | break; |
5647 | 0 | case SOURCE_VAR_BASED_PARTITION: |
5648 | 0 | set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); |
5649 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5650 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5651 | 0 | break; |
5652 | 0 | case FIXED_PARTITION: |
5653 | 0 | if (!seg_skip) bsize = sf->always_this_block_size; |
5654 | 0 | set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); |
5655 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5656 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5657 | 0 | break; |
5658 | 0 | default: |
5659 | 0 | assert(partition_search_type == REFERENCE_PARTITION); |
5660 | 0 | x->sb_pickmode_part = 1; |
5661 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); |
5662 | | // Use nonrd_pick_partition on scene-cut for VBR mode. |
5663 | | // nonrd_pick_partition does not support 4x4 partition, so avoid it |
5664 | | // on key frame for now. |
5665 | 0 | if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && |
5666 | 0 | cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) && |
5667 | 0 | (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { |
5668 | | // Use lower max_partition_size for low resolutions. |
5669 | 0 | if (cm->width <= 352 && cm->height <= 288) |
5670 | 0 | x->max_partition_size = BLOCK_32X32; |
5671 | 0 | else |
5672 | 0 | x->max_partition_size = BLOCK_64X64; |
5673 | 0 | x->min_partition_size = BLOCK_8X8; |
5674 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, |
5675 | 0 | BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, |
5676 | 0 | td->pc_root); |
5677 | 0 | } else { |
5678 | 0 | choose_partitioning(cpi, tile_info, x, mi_row, mi_col); |
5679 | | // TODO(marpan): Seems like nonrd_select_partition does not support |
5680 | | // 4x4 partition. Since 4x4 is used on key frame, use this switch |
5681 | | // for now. |
5682 | 0 | if (frame_is_intra_only(cm)) |
5683 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5684 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5685 | 0 | else |
5686 | 0 | nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5687 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5688 | 0 | } |
5689 | |
|
5690 | 0 | break; |
5691 | 0 | } |
5692 | | |
5693 | | // Update ref_frame usage for inter frame if this group is ARF group. |
5694 | 0 | if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame && |
5695 | 0 | !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group && |
5696 | 0 | cpi->sf.use_altref_onepass) { |
5697 | 0 | int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
5698 | 0 | if (cpi->count_arf_frame_usage != NULL) |
5699 | 0 | cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage; |
5700 | 0 | if (cpi->count_lastgolden_frame_usage != NULL) |
5701 | 0 | cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage; |
5702 | 0 | } |
5703 | |
|
5704 | 0 | (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, |
5705 | 0 | sb_col_in_tile, num_sb_cols); |
5706 | 0 | } |
5707 | 0 | } |
5708 | | // end RTC play code |
5709 | | |
5710 | 0 | static INLINE uint32_t variance(const Diff *const d) { |
5711 | 0 | return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8); |
5712 | 0 | } |
5713 | | |
5714 | | #if CONFIG_VP9_HIGHBITDEPTH |
5715 | 0 | static INLINE uint32_t variance_highbd(Diff *const d) { |
5716 | 0 | const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8); |
5717 | 0 | return (var >= 0) ? (uint32_t)var : 0; |
5718 | 0 | } |
5719 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
5720 | | |
5721 | 0 | static int set_var_thresh_from_histogram(VP9_COMP *cpi) { |
5722 | 0 | const SPEED_FEATURES *const sf = &cpi->sf; |
5723 | 0 | const VP9_COMMON *const cm = &cpi->common; |
5724 | |
|
5725 | 0 | const uint8_t *src = cpi->Source->y_buffer; |
5726 | 0 | const uint8_t *last_src = cpi->Last_Source->y_buffer; |
5727 | 0 | const int src_stride = cpi->Source->y_stride; |
5728 | 0 | const int last_stride = cpi->Last_Source->y_stride; |
5729 | | |
5730 | | // Pick cutoff threshold |
5731 | 0 | const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) |
5732 | 0 | ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) |
5733 | 0 | : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100); |
5734 | 0 | DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]); |
5735 | 0 | Diff *var16 = cpi->source_diff_var; |
5736 | |
|
5737 | 0 | int sum = 0; |
5738 | 0 | int i, j; |
5739 | |
|
5740 | 0 | memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0])); |
5741 | |
|
5742 | 0 | for (i = 0; i < cm->mb_rows; i++) { |
5743 | 0 | for (j = 0; j < cm->mb_cols; j++) { |
5744 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
5745 | 0 | if (cm->use_highbitdepth) { |
5746 | 0 | switch (cm->bit_depth) { |
5747 | 0 | case VPX_BITS_8: |
5748 | 0 | vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride, |
5749 | 0 | &var16->sse, &var16->sum); |
5750 | 0 | var16->var = variance(var16); |
5751 | 0 | break; |
5752 | 0 | case VPX_BITS_10: |
5753 | 0 | vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride, |
5754 | 0 | &var16->sse, &var16->sum); |
5755 | 0 | var16->var = variance_highbd(var16); |
5756 | 0 | break; |
5757 | 0 | default: |
5758 | 0 | assert(cm->bit_depth == VPX_BITS_12); |
5759 | 0 | vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, |
5760 | 0 | &var16->sse, &var16->sum); |
5761 | 0 | var16->var = variance_highbd(var16); |
5762 | 0 | break; |
5763 | 0 | } |
5764 | 0 | } else { |
5765 | 0 | vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, |
5766 | 0 | &var16->sum); |
5767 | 0 | var16->var = variance(var16); |
5768 | 0 | } |
5769 | | #else |
5770 | | vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, |
5771 | | &var16->sum); |
5772 | | var16->var = variance(var16); |
5773 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
5774 | | |
5775 | 0 | if (var16->var >= VAR_HIST_MAX_BG_VAR) |
5776 | 0 | hist[VAR_HIST_BINS - 1]++; |
5777 | 0 | else |
5778 | 0 | hist[var16->var / VAR_HIST_FACTOR]++; |
5779 | |
|
5780 | 0 | src += 16; |
5781 | 0 | last_src += 16; |
5782 | 0 | var16++; |
5783 | 0 | } |
5784 | | |
5785 | 0 | src = src - cm->mb_cols * 16 + 16 * src_stride; |
5786 | 0 | last_src = last_src - cm->mb_cols * 16 + 16 * last_stride; |
5787 | 0 | } |
5788 | | |
5789 | 0 | cpi->source_var_thresh = 0; |
5790 | |
|
5791 | 0 | if (hist[VAR_HIST_BINS - 1] < cutoff) { |
5792 | 0 | for (i = 0; i < VAR_HIST_BINS - 1; i++) { |
5793 | 0 | sum += hist[i]; |
5794 | |
|
5795 | 0 | if (sum > cutoff) { |
5796 | 0 | cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR; |
5797 | 0 | return 0; |
5798 | 0 | } |
5799 | 0 | } |
5800 | 0 | } |
5801 | | |
5802 | 0 | return sf->search_type_check_frequency; |
5803 | 0 | } |
5804 | | |
5805 | 0 | static void source_var_based_partition_search_method(VP9_COMP *cpi) { |
5806 | 0 | VP9_COMMON *const cm = &cpi->common; |
5807 | 0 | SPEED_FEATURES *const sf = &cpi->sf; |
5808 | |
|
5809 | 0 | if (cm->frame_type == KEY_FRAME) { |
5810 | | // For key frame, use SEARCH_PARTITION. |
5811 | 0 | sf->partition_search_type = SEARCH_PARTITION; |
5812 | 0 | } else if (cm->intra_only) { |
5813 | 0 | sf->partition_search_type = FIXED_PARTITION; |
5814 | 0 | } else { |
5815 | 0 | if (cm->last_width != cm->width || cm->last_height != cm->height) { |
5816 | 0 | if (cpi->source_diff_var) vpx_free(cpi->source_diff_var); |
5817 | |
|
5818 | 0 | CHECK_MEM_ERROR(&cm->error, cpi->source_diff_var, |
5819 | 0 | vpx_calloc(cm->MBs, sizeof(cpi->source_diff_var))); |
5820 | 0 | } |
5821 | |
|
5822 | 0 | if (!cpi->frames_till_next_var_check) |
5823 | 0 | cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi); |
5824 | |
|
5825 | 0 | if (cpi->frames_till_next_var_check > 0) { |
5826 | 0 | sf->partition_search_type = FIXED_PARTITION; |
5827 | 0 | cpi->frames_till_next_var_check--; |
5828 | 0 | } |
5829 | 0 | } |
5830 | 0 | } |
5831 | | |
5832 | 0 | static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { |
5833 | 0 | unsigned int intra_count = 0, inter_count = 0; |
5834 | 0 | int j; |
5835 | |
|
5836 | 0 | for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { |
5837 | 0 | intra_count += td->counts->intra_inter[j][0]; |
5838 | 0 | inter_count += td->counts->intra_inter[j][1]; |
5839 | 0 | } |
5840 | |
|
5841 | 0 | return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME && |
5842 | 0 | cm->show_frame; |
5843 | 0 | } |
5844 | | |
5845 | 42.2k | void vp9_init_tile_data(VP9_COMP *cpi) { |
5846 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
5847 | 42.2k | const int tile_cols = 1 << cm->log2_tile_cols; |
5848 | 42.2k | const int tile_rows = 1 << cm->log2_tile_rows; |
5849 | 42.2k | int tile_col, tile_row; |
5850 | 42.2k | TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; |
5851 | 42.2k | TOKENLIST *tplist = cpi->tplist[0][0]; |
5852 | 42.2k | int tile_tok = 0; |
5853 | 42.2k | int tplist_count = 0; |
5854 | | |
5855 | 42.2k | if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { |
5856 | 2.87k | if (cpi->tile_data != NULL) { |
5857 | | // Free the row mt memory in cpi->tile_data first. |
5858 | 0 | vp9_row_mt_mem_dealloc(cpi); |
5859 | 0 | vpx_free(cpi->tile_data); |
5860 | 0 | } |
5861 | 2.87k | cpi->allocated_tiles = 0; |
5862 | 2.87k | CHECK_MEM_ERROR( |
5863 | 2.87k | &cm->error, cpi->tile_data, |
5864 | 2.87k | vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data))); |
5865 | 2.87k | cpi->allocated_tiles = tile_cols * tile_rows; |
5866 | | |
5867 | 5.74k | for (tile_row = 0; tile_row < tile_rows; ++tile_row) |
5868 | 7.54k | for (tile_col = 0; tile_col < tile_cols; ++tile_col) { |
5869 | 4.67k | TileDataEnc *tile_data = |
5870 | 4.67k | &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5871 | 4.67k | int i, j; |
5872 | 4.67k | const MV zero_mv = { 0, 0 }; |
5873 | 65.4k | for (i = 0; i < BLOCK_SIZES; ++i) { |
5874 | 1.88M | for (j = 0; j < MAX_MODES; ++j) { |
5875 | 1.82M | tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; |
5876 | 1.82M | tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; |
5877 | 1.82M | tile_data->mode_map[i][j] = j; |
5878 | 1.82M | } |
5879 | 60.7k | } |
5880 | 4.67k | tile_data->firstpass_top_mv = zero_mv; |
5881 | 4.67k | #if CONFIG_MULTITHREAD |
5882 | 4.67k | tile_data->row_base_thresh_freq_fact = NULL; |
5883 | 4.67k | #endif |
5884 | 4.67k | } |
5885 | 2.87k | } |
5886 | | |
5887 | 84.5k | for (tile_row = 0; tile_row < tile_rows; ++tile_row) { |
5888 | 86.9k | for (tile_col = 0; tile_col < tile_cols; ++tile_col) { |
5889 | 44.7k | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5890 | 44.7k | TileInfo *tile_info = &this_tile->tile_info; |
5891 | 44.7k | if (cpi->sf.adaptive_rd_thresh_row_mt) { |
5892 | 0 | vp9_row_mt_alloc_rd_thresh(cpi, this_tile); |
5893 | 0 | } |
5894 | 44.7k | vp9_tile_init(tile_info, cm, tile_row, tile_col); |
5895 | | |
5896 | 44.7k | cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; |
5897 | 44.7k | pre_tok = cpi->tile_tok[tile_row][tile_col]; |
5898 | 44.7k | tile_tok = allocated_tokens(*tile_info); |
5899 | | |
5900 | 44.7k | cpi->tplist[tile_row][tile_col] = tplist + tplist_count; |
5901 | 44.7k | tplist = cpi->tplist[tile_row][tile_col]; |
5902 | 44.7k | tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); |
5903 | 44.7k | } |
5904 | 42.2k | } |
5905 | 42.2k | } |
5906 | | |
5907 | | void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, |
5908 | 55.4k | int tile_col, int mi_row) { |
5909 | 55.4k | VP9_COMMON *const cm = &cpi->common; |
5910 | 55.4k | const int tile_cols = 1 << cm->log2_tile_cols; |
5911 | 55.4k | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5912 | 55.4k | const TileInfo *const tile_info = &this_tile->tile_info; |
5913 | 55.4k | TOKENEXTRA *tok = NULL; |
5914 | 55.4k | int tile_sb_row; |
5915 | 55.4k | int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; |
5916 | | |
5917 | 55.4k | tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >> |
5918 | 55.4k | MI_BLOCK_SIZE_LOG2; |
5919 | 55.4k | get_start_tok(cpi, tile_row, tile_col, mi_row, &tok); |
5920 | 55.4k | cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok; |
5921 | | |
5922 | | #if CONFIG_REALTIME_ONLY |
5923 | | assert(cpi->sf.use_nonrd_pick_mode); |
5924 | | encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); |
5925 | | #else |
5926 | 55.4k | if (cpi->sf.use_nonrd_pick_mode) |
5927 | 0 | encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); |
5928 | 55.4k | else |
5929 | 55.4k | encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); |
5930 | 55.4k | #endif |
5931 | | |
5932 | 55.4k | cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; |
5933 | 55.4k | cpi->tplist[tile_row][tile_col][tile_sb_row].count = |
5934 | 55.4k | (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop - |
5935 | 55.4k | cpi->tplist[tile_row][tile_col][tile_sb_row].start); |
5936 | 55.4k | assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <= |
5937 | 55.4k | get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols)); |
5938 | | |
5939 | 55.4k | (void)tile_mb_cols; |
5940 | 55.4k | } |
5941 | | |
5942 | | void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, |
5943 | 44.7k | int tile_col) { |
5944 | 44.7k | VP9_COMMON *const cm = &cpi->common; |
5945 | 44.7k | const int tile_cols = 1 << cm->log2_tile_cols; |
5946 | 44.7k | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5947 | 44.7k | const TileInfo *const tile_info = &this_tile->tile_info; |
5948 | 44.7k | const int mi_row_start = tile_info->mi_row_start; |
5949 | 44.7k | const int mi_row_end = tile_info->mi_row_end; |
5950 | 44.7k | int mi_row; |
5951 | | |
5952 | 100k | for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) |
5953 | 55.4k | vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); |
5954 | 44.7k | } |
5955 | | |
5956 | 42.2k | static void encode_tiles(VP9_COMP *cpi) { |
5957 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
5958 | 42.2k | const int tile_cols = 1 << cm->log2_tile_cols; |
5959 | 42.2k | const int tile_rows = 1 << cm->log2_tile_rows; |
5960 | 42.2k | int tile_col, tile_row; |
5961 | | |
5962 | 42.2k | vp9_init_tile_data(cpi); |
5963 | | |
5964 | 84.5k | for (tile_row = 0; tile_row < tile_rows; ++tile_row) |
5965 | 86.9k | for (tile_col = 0; tile_col < tile_cols; ++tile_col) |
5966 | 44.7k | vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); |
5967 | 42.2k | } |
5968 | | |
5969 | 0 | static int compare_kmeans_data(const void *a, const void *b) { |
5970 | 0 | if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) { |
5971 | 0 | return 1; |
5972 | 0 | } else if (((const KMEANS_DATA *)a)->value < |
5973 | 0 | ((const KMEANS_DATA *)b)->value) { |
5974 | 0 | return -1; |
5975 | 0 | } else { |
5976 | 0 | return 0; |
5977 | 0 | } |
5978 | 0 | } |
5979 | | |
5980 | | static void compute_boundary_ls(const double *ctr_ls, int k, |
5981 | 0 | double *boundary_ls) { |
5982 | | // boundary_ls[j] is the upper bound of data centered at ctr_ls[j] |
5983 | 0 | int j; |
5984 | 0 | for (j = 0; j < k - 1; ++j) { |
5985 | 0 | boundary_ls[j] = (ctr_ls[j] + ctr_ls[j + 1]) / 2.; |
5986 | 0 | } |
5987 | 0 | boundary_ls[k - 1] = DBL_MAX; |
5988 | 0 | } |
5989 | | |
5990 | 0 | int vp9_get_group_idx(double value, double *boundary_ls, int k) { |
5991 | 0 | int group_idx = 0; |
5992 | 0 | while (value >= boundary_ls[group_idx]) { |
5993 | 0 | ++group_idx; |
5994 | 0 | if (group_idx == k - 1) { |
5995 | 0 | break; |
5996 | 0 | } |
5997 | 0 | } |
5998 | 0 | return group_idx; |
5999 | 0 | } |
6000 | | |
6001 | | void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k, |
6002 | 0 | KMEANS_DATA *arr, int size) { |
6003 | 0 | int i, j; |
6004 | 0 | int itr; |
6005 | 0 | int group_idx; |
6006 | 0 | double sum[MAX_KMEANS_GROUPS]; |
6007 | 0 | int count[MAX_KMEANS_GROUPS]; |
6008 | |
|
6009 | 0 | vpx_clear_system_state(); |
6010 | |
|
6011 | 0 | assert(k >= 2 && k <= MAX_KMEANS_GROUPS); |
6012 | |
|
6013 | 0 | qsort(arr, size, sizeof(*arr), compare_kmeans_data); |
6014 | | |
6015 | | // initialize the center points |
6016 | 0 | for (j = 0; j < k; ++j) { |
6017 | 0 | ctr_ls[j] = arr[(size * (2 * j + 1)) / (2 * k)].value; |
6018 | 0 | } |
6019 | |
|
6020 | 0 | for (itr = 0; itr < 10; ++itr) { |
6021 | 0 | compute_boundary_ls(ctr_ls, k, boundary_ls); |
6022 | 0 | for (i = 0; i < MAX_KMEANS_GROUPS; ++i) { |
6023 | 0 | sum[i] = 0; |
6024 | 0 | count[i] = 0; |
6025 | 0 | } |
6026 | | |
6027 | | // Both the data and centers are sorted in ascending order. |
6028 | | // As each data point is processed in order, its corresponding group index |
6029 | | // can only increase. So we only need to reset the group index to zero here. |
6030 | 0 | group_idx = 0; |
6031 | 0 | for (i = 0; i < size; ++i) { |
6032 | 0 | while (arr[i].value >= boundary_ls[group_idx]) { |
6033 | | // place samples into clusters |
6034 | 0 | ++group_idx; |
6035 | 0 | if (group_idx == k - 1) { |
6036 | 0 | break; |
6037 | 0 | } |
6038 | 0 | } |
6039 | 0 | sum[group_idx] += arr[i].value; |
6040 | 0 | ++count[group_idx]; |
6041 | 0 | } |
6042 | |
|
6043 | 0 | for (group_idx = 0; group_idx < k; ++group_idx) { |
6044 | 0 | if (count[group_idx] > 0) |
6045 | 0 | ctr_ls[group_idx] = sum[group_idx] / count[group_idx]; |
6046 | |
|
6047 | 0 | sum[group_idx] = 0; |
6048 | 0 | count[group_idx] = 0; |
6049 | 0 | } |
6050 | 0 | } |
6051 | | |
6052 | | // compute group_idx, boundary_ls and count_ls |
6053 | 0 | for (j = 0; j < k; ++j) { |
6054 | 0 | count_ls[j] = 0; |
6055 | 0 | } |
6056 | 0 | compute_boundary_ls(ctr_ls, k, boundary_ls); |
6057 | 0 | group_idx = 0; |
6058 | 0 | for (i = 0; i < size; ++i) { |
6059 | 0 | while (arr[i].value >= boundary_ls[group_idx]) { |
6060 | 0 | ++group_idx; |
6061 | 0 | if (group_idx == k - 1) { |
6062 | 0 | break; |
6063 | 0 | } |
6064 | 0 | } |
6065 | 0 | arr[i].group_idx = group_idx; |
6066 | 0 | ++count_ls[group_idx]; |
6067 | 0 | } |
6068 | 0 | } |
6069 | | |
6070 | 42.2k | static void encode_frame_internal(VP9_COMP *cpi) { |
6071 | 42.2k | SPEED_FEATURES *const sf = &cpi->sf; |
6072 | 42.2k | ThreadData *const td = &cpi->td; |
6073 | 42.2k | MACROBLOCK *const x = &td->mb; |
6074 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
6075 | 42.2k | MACROBLOCKD *const xd = &x->e_mbd; |
6076 | 42.2k | const int gf_group_index = cpi->twopass.gf_group.index; |
6077 | | |
6078 | 42.2k | xd->mi = cm->mi_grid_visible; |
6079 | 42.2k | xd->mi[0] = cm->mi; |
6080 | 42.2k | vp9_zero(*td->counts); |
6081 | 42.2k | vp9_zero(cpi->td.rd_counts); |
6082 | | |
6083 | 42.2k | xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && |
6084 | 42.2k | cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; |
6085 | | |
6086 | 42.2k | #if CONFIG_VP9_HIGHBITDEPTH |
6087 | 42.2k | if (cm->use_highbitdepth) |
6088 | 0 | x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4; |
6089 | 42.2k | else |
6090 | 42.2k | x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; |
6091 | 42.2k | x->highbd_inv_txfm_add = |
6092 | 42.2k | xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; |
6093 | | #else |
6094 | | x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; |
6095 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
6096 | 42.2k | x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; |
6097 | 42.2k | x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; |
6098 | 42.2k | if (xd->lossless) x->optimize = 0; |
6099 | 42.2k | x->sharpness = cpi->oxcf.sharpness; |
6100 | 42.2k | x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ); |
6101 | | |
6102 | 42.2k | cm->tx_mode = select_tx_mode(cpi, xd); |
6103 | | |
6104 | 42.2k | vp9_frame_init_quantizer(cpi); |
6105 | | |
6106 | 42.2k | vp9_initialize_rd_consts(cpi); |
6107 | 42.2k | vp9_initialize_me_consts(cpi, x, cm->base_qindex); |
6108 | 42.2k | init_encode_frame_mb_context(cpi); |
6109 | 42.2k | cm->use_prev_frame_mvs = |
6110 | 42.2k | !cm->error_resilient_mode && cm->width == cm->last_width && |
6111 | 42.2k | cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame; |
6112 | | // Special case: set prev_mi to NULL when the previous mode info |
6113 | | // context cannot be used. |
6114 | 42.2k | cm->prev_mi = |
6115 | 42.2k | cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL; |
6116 | | |
6117 | 42.2k | x->quant_fp = cpi->sf.use_quant_fp; |
6118 | 42.2k | vp9_zero(x->skip_txfm); |
6119 | 42.2k | if (sf->use_nonrd_pick_mode) { |
6120 | | // Initialize internal buffer pointers for rtc coding, where non-RD |
6121 | | // mode decision is used and hence no buffer pointer swap needed. |
6122 | 0 | int i; |
6123 | 0 | struct macroblock_plane *const p = x->plane; |
6124 | 0 | struct macroblockd_plane *const pd = xd->plane; |
6125 | 0 | PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; |
6126 | |
|
6127 | 0 | for (i = 0; i < MAX_MB_PLANE; ++i) { |
6128 | 0 | p[i].coeff = ctx->coeff_pbuf[i][0]; |
6129 | 0 | p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; |
6130 | 0 | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; |
6131 | 0 | p[i].eobs = ctx->eobs_pbuf[i][0]; |
6132 | 0 | } |
6133 | 0 | vp9_zero(x->zcoeff_blk); |
6134 | |
|
6135 | 0 | if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 && |
6136 | 0 | !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) && |
6137 | 0 | !cpi->use_svc) |
6138 | 0 | cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); |
6139 | |
|
6140 | 0 | if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) |
6141 | 0 | source_var_based_partition_search_method(cpi); |
6142 | 42.2k | } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE && |
6143 | 42.2k | cpi->sf.enable_tpl_model) { |
6144 | 0 | TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; |
6145 | 0 | TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; |
6146 | |
|
6147 | 0 | int tpl_stride = tpl_frame->stride; |
6148 | 0 | int64_t intra_cost_base = 0; |
6149 | 0 | int64_t mc_dep_cost_base = 0; |
6150 | 0 | int row, col; |
6151 | |
|
6152 | 0 | for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) { |
6153 | 0 | for (col = 0; col < cm->mi_cols; ++col) { |
6154 | 0 | TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; |
6155 | 0 | intra_cost_base += this_stats->intra_cost; |
6156 | 0 | mc_dep_cost_base += this_stats->mc_dep_cost; |
6157 | 0 | } |
6158 | 0 | } |
6159 | |
|
6160 | 0 | vpx_clear_system_state(); |
6161 | |
|
6162 | 0 | if (tpl_frame->is_valid) |
6163 | 0 | cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; |
6164 | 0 | } |
6165 | | |
6166 | 169k | for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; |
6167 | 126k | ++ref_frame) { |
6168 | 126k | if (cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) { |
6169 | 90.9k | if (cm->frame_refs[ref_frame - 1].sf.x_scale_fp == REF_INVALID_SCALE || |
6170 | 90.9k | cm->frame_refs[ref_frame - 1].sf.y_scale_fp == REF_INVALID_SCALE) |
6171 | 0 | cpi->ref_frame_flags &= ~ref_frame_to_flag(ref_frame); |
6172 | 90.9k | } |
6173 | 126k | } |
6174 | | |
6175 | | // Frame segmentation |
6176 | 42.2k | if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ) build_kmeans_segmentation(cpi); |
6177 | | |
6178 | 42.2k | { |
6179 | 42.2k | struct vpx_usec_timer emr_timer; |
6180 | 42.2k | vpx_usec_timer_start(&emr_timer); |
6181 | | |
6182 | 42.2k | if (!cpi->row_mt) { |
6183 | 42.2k | cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; |
6184 | 42.2k | cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; |
6185 | | // If allowed, encoding tiles in parallel with one thread handling one |
6186 | | // tile when row based multi-threading is disabled. |
6187 | 42.2k | if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) |
6188 | 0 | vp9_encode_tiles_mt(cpi); |
6189 | 42.2k | else |
6190 | 42.2k | encode_tiles(cpi); |
6191 | 42.2k | } else { |
6192 | 0 | cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; |
6193 | 0 | cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; |
6194 | 0 | vp9_encode_tiles_row_mt(cpi); |
6195 | 0 | } |
6196 | | |
6197 | 42.2k | vpx_usec_timer_mark(&emr_timer); |
6198 | 42.2k | cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); |
6199 | 42.2k | } |
6200 | | |
6201 | 42.2k | sf->skip_encode_frame = |
6202 | 42.2k | sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0; |
6203 | | |
6204 | | #if 0 |
6205 | | // Keep record of the total distortion this time around for future use |
6206 | | cpi->last_frame_distortion = cpi->frame_distortion; |
6207 | | #endif |
6208 | 42.2k | } |
6209 | | |
6210 | | static INTERP_FILTER get_interp_filter( |
6211 | 42.2k | const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) { |
6212 | 42.2k | if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] && |
6213 | 42.2k | threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] && |
6214 | 42.2k | threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) { |
6215 | 7.82k | return EIGHTTAP_SMOOTH; |
6216 | 34.4k | } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] && |
6217 | 34.4k | threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) { |
6218 | 1.02k | return EIGHTTAP_SHARP; |
6219 | 33.4k | } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) { |
6220 | 9.77k | return EIGHTTAP; |
6221 | 23.6k | } else { |
6222 | 23.6k | return SWITCHABLE; |
6223 | 23.6k | } |
6224 | 42.2k | } |
6225 | | |
6226 | 0 | static int compute_frame_aq_offset(struct VP9_COMP *cpi) { |
6227 | 0 | VP9_COMMON *const cm = &cpi->common; |
6228 | 0 | MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; |
6229 | 0 | struct segmentation *const seg = &cm->seg; |
6230 | |
|
6231 | 0 | int mi_row, mi_col; |
6232 | 0 | int sum_delta = 0; |
6233 | 0 | int qdelta_index; |
6234 | 0 | int segment_id; |
6235 | |
|
6236 | 0 | for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { |
6237 | 0 | MODE_INFO **mi_8x8 = mi_8x8_ptr; |
6238 | 0 | for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { |
6239 | 0 | segment_id = mi_8x8[0]->segment_id; |
6240 | 0 | qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); |
6241 | 0 | sum_delta += qdelta_index; |
6242 | 0 | } |
6243 | 0 | mi_8x8_ptr += cm->mi_stride; |
6244 | 0 | } |
6245 | |
|
6246 | 0 | return sum_delta / (cm->mi_rows * cm->mi_cols); |
6247 | 0 | } |
6248 | | |
6249 | 42.2k | static void restore_encode_params(VP9_COMP *cpi) { |
6250 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
6251 | 42.2k | int tile_idx; |
6252 | 42.2k | int i, j; |
6253 | 42.2k | TileDataEnc *tile_data; |
6254 | 42.2k | RD_OPT *rd_opt = &cpi->rd; |
6255 | 211k | for (i = 0; i < MAX_REF_FRAMES; i++) { |
6256 | 676k | for (j = 0; j < REFERENCE_MODES; j++) |
6257 | 507k | rd_opt->prediction_type_threshes[i][j] = |
6258 | 507k | rd_opt->prediction_type_threshes_prev[i][j]; |
6259 | | |
6260 | 845k | for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) |
6261 | 676k | rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; |
6262 | 169k | } |
6263 | | |
6264 | 82.3k | for (tile_idx = 0; tile_idx < cpi->allocated_tiles; tile_idx++) { |
6265 | 40.0k | assert(cpi->tile_data); |
6266 | 40.0k | tile_data = &cpi->tile_data[tile_idx]; |
6267 | 40.0k | vp9_copy(tile_data->thresh_freq_fact, tile_data->thresh_freq_fact_prev); |
6268 | 40.0k | } |
6269 | | |
6270 | 42.2k | cm->interp_filter = cpi->sf.default_interp_filter; |
6271 | 42.2k | } |
6272 | | |
6273 | 42.2k | void vp9_encode_frame(VP9_COMP *cpi) { |
6274 | 42.2k | VP9_COMMON *const cm = &cpi->common; |
6275 | | |
6276 | 42.2k | restore_encode_params(cpi); |
6277 | | |
6278 | | #if CONFIG_MISMATCH_DEBUG |
6279 | | mismatch_reset_frame(MAX_MB_PLANE); |
6280 | | #endif |
6281 | | |
6282 | | // In the longer term the encoder should be generalized to match the |
6283 | | // decoder such that we allow compound where one of the 3 buffers has a |
6284 | | // different sign bias and that buffer is then the fixed ref. However, this |
6285 | | // requires further work in the rd loop. For now the only supported encoder |
6286 | | // side behavior is where the ALT ref buffer has opposite sign bias to |
6287 | | // the other two. |
6288 | 42.2k | if (!frame_is_intra_only(cm)) { |
6289 | 34.5k | if (vp9_compound_reference_allowed(cm)) { |
6290 | 0 | cpi->allow_comp_inter_inter = 1; |
6291 | 0 | vp9_setup_compound_reference_mode(cm); |
6292 | 34.5k | } else { |
6293 | 34.5k | cpi->allow_comp_inter_inter = 0; |
6294 | 34.5k | } |
6295 | 34.5k | } |
6296 | | |
6297 | 42.2k | if (cpi->sf.frame_parameter_update) { |
6298 | 42.2k | int i; |
6299 | 42.2k | RD_OPT *const rd_opt = &cpi->rd; |
6300 | 42.2k | FRAME_COUNTS *counts = cpi->td.counts; |
6301 | 42.2k | RD_COUNTS *const rdc = &cpi->td.rd_counts; |
6302 | | |
6303 | | // This code does a single RD pass over the whole frame assuming |
6304 | | // either compound, single or hybrid prediction as per whatever has |
6305 | | // worked best for that type of frame in the past. |
6306 | | // It also predicts whether another coding mode would have worked |
6307 | | // better than this coding mode. If that is the case, it remembers |
6308 | | // that for subsequent frames. |
6309 | | // It also does the same analysis for transform size selection. |
6310 | 42.2k | const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); |
6311 | 42.2k | int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; |
6312 | 42.2k | int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; |
6313 | 42.2k | const int is_alt_ref = frame_type == ALTREF_FRAME; |
6314 | | |
6315 | | /* prediction (compound, single or hybrid) mode selection */ |
6316 | 42.2k | if (is_alt_ref || !cpi->allow_comp_inter_inter) |
6317 | 42.2k | cm->reference_mode = SINGLE_REFERENCE; |
6318 | 0 | else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && |
6319 | 0 | mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && |
6320 | 0 | check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) |
6321 | 0 | cm->reference_mode = COMPOUND_REFERENCE; |
6322 | 0 | else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT]) |
6323 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
6324 | 0 | else |
6325 | 0 | cm->reference_mode = REFERENCE_MODE_SELECT; |
6326 | | |
6327 | 42.2k | if (cm->interp_filter == SWITCHABLE) |
6328 | 42.2k | cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); |
6329 | | |
6330 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
6331 | | start_timing(cpi, encode_frame_internal_time); |
6332 | | #endif |
6333 | 42.2k | encode_frame_internal(cpi); |
6334 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
6335 | | end_timing(cpi, encode_frame_internal_time); |
6336 | | #endif |
6337 | | |
6338 | 169k | for (i = 0; i < REFERENCE_MODES; ++i) |
6339 | 126k | mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; |
6340 | | |
6341 | 211k | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) |
6342 | 169k | filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2; |
6343 | | |
6344 | 42.2k | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
6345 | 0 | int single_count_zero = 0; |
6346 | 0 | int comp_count_zero = 0; |
6347 | |
|
6348 | 0 | for (i = 0; i < COMP_INTER_CONTEXTS; i++) { |
6349 | 0 | single_count_zero += counts->comp_inter[i][0]; |
6350 | 0 | comp_count_zero += counts->comp_inter[i][1]; |
6351 | 0 | } |
6352 | |
|
6353 | 0 | if (comp_count_zero == 0) { |
6354 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
6355 | 0 | vp9_zero(counts->comp_inter); |
6356 | 0 | } else if (single_count_zero == 0) { |
6357 | 0 | cm->reference_mode = COMPOUND_REFERENCE; |
6358 | 0 | vp9_zero(counts->comp_inter); |
6359 | 0 | } |
6360 | 0 | } |
6361 | | |
6362 | 42.2k | if (cm->tx_mode == TX_MODE_SELECT) { |
6363 | 21.6k | int count4x4 = 0; |
6364 | 21.6k | int count8x8_lp = 0, count8x8_8x8p = 0; |
6365 | 21.6k | int count16x16_16x16p = 0, count16x16_lp = 0; |
6366 | 21.6k | int count32x32 = 0; |
6367 | | |
6368 | 64.8k | for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { |
6369 | 43.2k | count4x4 += counts->tx.p32x32[i][TX_4X4]; |
6370 | 43.2k | count4x4 += counts->tx.p16x16[i][TX_4X4]; |
6371 | 43.2k | count4x4 += counts->tx.p8x8[i][TX_4X4]; |
6372 | | |
6373 | 43.2k | count8x8_lp += counts->tx.p32x32[i][TX_8X8]; |
6374 | 43.2k | count8x8_lp += counts->tx.p16x16[i][TX_8X8]; |
6375 | 43.2k | count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; |
6376 | | |
6377 | 43.2k | count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; |
6378 | 43.2k | count16x16_lp += counts->tx.p32x32[i][TX_16X16]; |
6379 | 43.2k | count32x32 += counts->tx.p32x32[i][TX_32X32]; |
6380 | 43.2k | } |
6381 | 21.6k | if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && |
6382 | 21.6k | count32x32 == 0) { |
6383 | 3.58k | cm->tx_mode = ALLOW_8X8; |
6384 | 3.58k | reset_skip_tx_size(cm, TX_8X8); |
6385 | 18.0k | } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && |
6386 | 18.0k | count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { |
6387 | 2.08k | cm->tx_mode = ONLY_4X4; |
6388 | 2.08k | reset_skip_tx_size(cm, TX_4X4); |
6389 | 15.9k | } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { |
6390 | 3.11k | cm->tx_mode = ALLOW_32X32; |
6391 | 12.8k | } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { |
6392 | 36 | cm->tx_mode = ALLOW_16X16; |
6393 | 36 | reset_skip_tx_size(cm, TX_16X16); |
6394 | 36 | } |
6395 | 21.6k | } |
6396 | 42.2k | } else { |
6397 | 0 | FRAME_COUNTS *counts = cpi->td.counts; |
6398 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
6399 | 0 | if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode && |
6400 | 0 | cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref && |
6401 | 0 | cm->frame_type != KEY_FRAME) |
6402 | 0 | cm->reference_mode = REFERENCE_MODE_SELECT; |
6403 | |
|
6404 | 0 | encode_frame_internal(cpi); |
6405 | |
|
6406 | 0 | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
6407 | 0 | int single_count_zero = 0; |
6408 | 0 | int comp_count_zero = 0; |
6409 | 0 | int i; |
6410 | 0 | for (i = 0; i < COMP_INTER_CONTEXTS; i++) { |
6411 | 0 | single_count_zero += counts->comp_inter[i][0]; |
6412 | 0 | comp_count_zero += counts->comp_inter[i][1]; |
6413 | 0 | } |
6414 | 0 | if (comp_count_zero == 0) { |
6415 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
6416 | 0 | vp9_zero(counts->comp_inter); |
6417 | 0 | } else if (single_count_zero == 0) { |
6418 | 0 | cm->reference_mode = COMPOUND_REFERENCE; |
6419 | 0 | vp9_zero(counts->comp_inter); |
6420 | 0 | } |
6421 | 0 | } |
6422 | 0 | } |
6423 | | |
6424 | | // If segmented AQ is enabled compute the average AQ weighting. |
6425 | 42.2k | if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && |
6426 | 42.2k | (cm->seg.update_map || cm->seg.update_data)) { |
6427 | 0 | cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); |
6428 | 0 | } |
6429 | 42.2k | } |
6430 | | |
6431 | 1.31M | static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { |
6432 | 1.31M | const PREDICTION_MODE y_mode = mi->mode; |
6433 | 1.31M | const PREDICTION_MODE uv_mode = mi->uv_mode; |
6434 | 1.31M | const BLOCK_SIZE bsize = mi->sb_type; |
6435 | | |
6436 | 1.31M | if (bsize < BLOCK_8X8) { |
6437 | 879k | int idx, idy; |
6438 | 879k | const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; |
6439 | 879k | const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; |
6440 | 2.55M | for (idy = 0; idy < 2; idy += num_4x4_h) |
6441 | 4.77M | for (idx = 0; idx < 2; idx += num_4x4_w) |
6442 | 3.10M | ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode]; |
6443 | 879k | } else { |
6444 | 436k | ++counts->y_mode[size_group_lookup[bsize]][y_mode]; |
6445 | 436k | } |
6446 | | |
6447 | 1.31M | ++counts->uv_mode[y_mode][uv_mode]; |
6448 | 1.31M | } |
6449 | | |
6450 | | static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, |
6451 | 1.89M | int mi_row, int mi_col, BLOCK_SIZE bsize) { |
6452 | 1.89M | const VP9_COMMON *const cm = &cpi->common; |
6453 | 1.89M | MV mv = mi->mv[0].as_mv; |
6454 | 1.89M | const int bw = num_8x8_blocks_wide_lookup[bsize]; |
6455 | 1.89M | const int bh = num_8x8_blocks_high_lookup[bsize]; |
6456 | 1.89M | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
6457 | 1.89M | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
6458 | 1.89M | const int block_index = mi_row * cm->mi_cols + mi_col; |
6459 | 1.89M | int x, y; |
6460 | 4.07M | for (y = 0; y < ymis; y++) |
6461 | 5.42M | for (x = 0; x < xmis; x++) { |
6462 | 3.24M | int map_offset = block_index + y * cm->mi_cols + x; |
6463 | 3.24M | if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) && |
6464 | 3.24M | mi->segment_id <= CR_SEGMENT_ID_BOOST2) { |
6465 | 681k | if (abs(mv.row) < 8 && abs(mv.col) < 8) { |
6466 | 104k | if (cpi->consec_zero_mv[map_offset] < 255) |
6467 | 104k | cpi->consec_zero_mv[map_offset]++; |
6468 | 576k | } else { |
6469 | 576k | cpi->consec_zero_mv[map_offset] = 0; |
6470 | 576k | } |
6471 | 681k | } |
6472 | 3.24M | } |
6473 | 1.89M | } |
6474 | | |
6475 | | static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, |
6476 | | int output_enabled, int mi_row, int mi_col, |
6477 | 7.54M | BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { |
6478 | 7.54M | VP9_COMMON *const cm = &cpi->common; |
6479 | 7.54M | MACROBLOCK *const x = &td->mb; |
6480 | 7.54M | MACROBLOCKD *const xd = &x->e_mbd; |
6481 | 7.54M | MODE_INFO *mi = xd->mi[0]; |
6482 | 7.54M | const int seg_skip = |
6483 | 7.54M | segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP); |
6484 | 7.54M | x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 && |
6485 | 7.54M | cpi->oxcf.aq_mode != COMPLEXITY_AQ && |
6486 | 7.54M | cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && |
6487 | 7.54M | cpi->sf.allow_skip_recode; |
6488 | | |
6489 | 7.54M | if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode) |
6490 | 7.54M | memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); |
6491 | | |
6492 | 7.54M | x->skip_optimize = ctx->is_coded; |
6493 | 7.54M | ctx->is_coded = 1; |
6494 | 7.54M | x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; |
6495 | 7.54M | x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && |
6496 | 7.54M | x->q_index < QIDX_SKIP_THRESH); |
6497 | | |
6498 | 7.54M | if (x->skip_encode) return; |
6499 | | |
6500 | 7.54M | if (!is_inter_block(mi)) { |
6501 | 5.24M | int plane; |
6502 | | #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH |
6503 | | if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && |
6504 | | (xd->above_mi == NULL || xd->left_mi == NULL) && |
6505 | | need_top_left[mi->uv_mode]) |
6506 | | assert(0); |
6507 | | #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH |
6508 | 5.24M | mi->skip = 1; |
6509 | 20.9M | for (plane = 0; plane < MAX_MB_PLANE; ++plane) |
6510 | 15.7M | vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1); |
6511 | 5.24M | if (output_enabled) sum_intra_stats(td->counts, mi); |
6512 | 5.24M | vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, |
6513 | 5.24M | VPXMAX(bsize, BLOCK_8X8)); |
6514 | 5.24M | } else { |
6515 | 2.29M | int ref; |
6516 | 2.29M | const int is_compound = has_second_ref(mi); |
6517 | 2.29M | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
6518 | 4.59M | for (ref = 0; ref < 1 + is_compound; ++ref) { |
6519 | 2.29M | YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]); |
6520 | 2.29M | assert(cfg != NULL); |
6521 | 2.29M | vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, |
6522 | 2.29M | &xd->block_refs[ref]->sf); |
6523 | 2.29M | } |
6524 | 2.29M | if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) |
6525 | 2.29M | vp9_build_inter_predictors_sby(xd, mi_row, mi_col, |
6526 | 2.29M | VPXMAX(bsize, BLOCK_8X8)); |
6527 | | |
6528 | 2.29M | vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, |
6529 | 2.29M | VPXMAX(bsize, BLOCK_8X8)); |
6530 | | |
6531 | | #if CONFIG_MISMATCH_DEBUG |
6532 | | if (output_enabled) { |
6533 | | int plane; |
6534 | | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
6535 | | const struct macroblockd_plane *pd = &xd->plane[plane]; |
6536 | | int pixel_c, pixel_r; |
6537 | | const BLOCK_SIZE plane_bsize = |
6538 | | get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); |
6539 | | const int bw = get_block_width(plane_bsize); |
6540 | | const int bh = get_block_height(plane_bsize); |
6541 | | mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, |
6542 | | pd->subsampling_x, pd->subsampling_y); |
6543 | | |
6544 | | mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, |
6545 | | pixel_r, bw, bh, |
6546 | | xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); |
6547 | | } |
6548 | | } |
6549 | | #endif |
6550 | | |
6551 | 2.29M | vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled); |
6552 | 2.29M | vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, |
6553 | 2.29M | VPXMAX(bsize, BLOCK_8X8)); |
6554 | 2.29M | } |
6555 | | |
6556 | 7.54M | if (seg_skip) { |
6557 | 0 | assert(mi->skip); |
6558 | 0 | } |
6559 | | |
6560 | 7.54M | if (output_enabled) { |
6561 | 1.89M | if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 && |
6562 | 1.89M | !(is_inter_block(mi) && mi->skip)) { |
6563 | 437k | ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), |
6564 | 437k | &td->counts->tx)[mi->tx_size]; |
6565 | 1.46M | } else { |
6566 | | // The new intra coding scheme requires no change of transform size |
6567 | 1.46M | if (is_inter_block(mi)) { |
6568 | 425k | mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], |
6569 | 425k | max_txsize_lookup[bsize]); |
6570 | 1.03M | } else { |
6571 | 1.03M | mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4; |
6572 | 1.03M | } |
6573 | 1.46M | } |
6574 | | |
6575 | 1.89M | ++td->counts->tx.tx_totals[mi->tx_size]; |
6576 | 1.89M | ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; |
6577 | 1.89M | if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && |
6578 | 1.89M | cpi->cyclic_refresh->content_mode) |
6579 | 0 | vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); |
6580 | 1.89M | if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && |
6581 | 1.89M | (!cpi->use_svc || |
6582 | 1.89M | (cpi->use_svc && |
6583 | 0 | !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && |
6584 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) |
6585 | 1.89M | update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); |
6586 | 1.89M | } |
6587 | 7.54M | } |