/src/libvpx/vp9/encoder/vp9_encodeframe.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <float.h> |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdio.h> |
15 | | |
16 | | #include "./vp9_rtcd.h" |
17 | | #include "./vpx_dsp_rtcd.h" |
18 | | #include "./vpx_config.h" |
19 | | |
20 | | #include "vpx_dsp/vpx_dsp_common.h" |
21 | | #include "vpx_ports/mem.h" |
22 | | #include "vpx_ports/vpx_timer.h" |
23 | | #include "vpx_ports/system_state.h" |
24 | | #include "vpx_util/vpx_pthread.h" |
25 | | #if CONFIG_MISMATCH_DEBUG |
26 | | #include "vpx_util/vpx_debug_util.h" |
27 | | #endif // CONFIG_MISMATCH_DEBUG |
28 | | |
29 | | #include "vp9/common/vp9_common.h" |
30 | | #include "vp9/common/vp9_entropy.h" |
31 | | #include "vp9/common/vp9_entropymode.h" |
32 | | #include "vp9/common/vp9_idct.h" |
33 | | #include "vp9/common/vp9_mvref_common.h" |
34 | | #include "vp9/common/vp9_pred_common.h" |
35 | | #include "vp9/common/vp9_quant_common.h" |
36 | | #include "vp9/common/vp9_reconintra.h" |
37 | | #include "vp9/common/vp9_reconinter.h" |
38 | | #include "vp9/common/vp9_seg_common.h" |
39 | | #include "vp9/common/vp9_tile_common.h" |
40 | | #if !CONFIG_REALTIME_ONLY |
41 | | #include "vp9/encoder/vp9_aq_360.h" |
42 | | #include "vp9/encoder/vp9_aq_complexity.h" |
43 | | #endif |
44 | | #include "vp9/encoder/vp9_aq_cyclicrefresh.h" |
45 | | #if !CONFIG_REALTIME_ONLY |
46 | | #include "vp9/encoder/vp9_aq_variance.h" |
47 | | #endif |
48 | | #include "vp9/encoder/vp9_encodeframe.h" |
49 | | #include "vp9/encoder/vp9_encodemb.h" |
50 | | #include "vp9/encoder/vp9_encodemv.h" |
51 | | #include "vp9/encoder/vp9_encoder.h" |
52 | | #include "vp9/encoder/vp9_ethread.h" |
53 | | #include "vp9/encoder/vp9_extend.h" |
54 | | #include "vp9/encoder/vp9_multi_thread.h" |
55 | | #include "vp9/encoder/vp9_partition_models.h" |
56 | | #include "vp9/encoder/vp9_pickmode.h" |
57 | | #include "vp9/encoder/vp9_rd.h" |
58 | | #include "vp9/encoder/vp9_rdopt.h" |
59 | | #include "vp9/encoder/vp9_segmentation.h" |
60 | | #include "vp9/encoder/vp9_tokenize.h" |
61 | | |
62 | | static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, |
63 | | int output_enabled, int mi_row, int mi_col, |
64 | | BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); |
65 | | |
66 | | // This is used as a reference when computing the source variance for the |
67 | | // purpose of activity masking. |
68 | | // Eventually this should be replaced by custom no-reference routines, |
69 | | // which will be faster. |
70 | | static const uint8_t VP9_VAR_OFFS[64] = { |
71 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
72 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
73 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
74 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
75 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 |
76 | | }; |
77 | | |
78 | | #if CONFIG_VP9_HIGHBITDEPTH |
79 | | static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { |
80 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
81 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
82 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
83 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, |
84 | | 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 |
85 | | }; |
86 | | |
87 | | static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { |
88 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
89 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
90 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
91 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
92 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
93 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
94 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, |
95 | | 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 |
96 | | }; |
97 | | |
98 | | static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { |
99 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
100 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
101 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
102 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
103 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
104 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
105 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
106 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
107 | | 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, |
108 | | 128 * 16 |
109 | | }; |
110 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
111 | | |
112 | | unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, |
113 | 10.8M | BLOCK_SIZE bs) { |
114 | 10.8M | unsigned int sse; |
115 | 10.8M | const unsigned int var = |
116 | 10.8M | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); |
117 | 10.8M | return var; |
118 | 10.8M | } |
119 | | |
120 | | #if CONFIG_VP9_HIGHBITDEPTH |
121 | | unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, |
122 | 0 | BLOCK_SIZE bs, int bd) { |
123 | 0 | unsigned int var, sse; |
124 | 0 | switch (bd) { |
125 | 0 | case 10: |
126 | 0 | var = |
127 | 0 | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
128 | 0 | CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse); |
129 | 0 | break; |
130 | 0 | case 12: |
131 | 0 | var = |
132 | 0 | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
133 | 0 | CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse); |
134 | 0 | break; |
135 | 0 | case 8: |
136 | 0 | default: |
137 | 0 | var = |
138 | 0 | cpi->fn_ptr[bs].vf(ref->buf, ref->stride, |
139 | 0 | CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | return var; |
143 | 0 | } |
144 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
145 | | |
146 | | unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, |
147 | | const struct buf_2d *ref, |
148 | 10.8M | BLOCK_SIZE bs) { |
149 | 10.8M | return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs), |
150 | 10.8M | num_pels_log2_lookup[bs]); |
151 | 10.8M | } |
152 | | |
153 | | #if CONFIG_VP9_HIGHBITDEPTH |
154 | | unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, |
155 | | const struct buf_2d *ref, |
156 | 0 | BLOCK_SIZE bs, int bd) { |
157 | 0 | return (unsigned int)ROUND64_POWER_OF_TWO( |
158 | 0 | (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd), |
159 | 0 | num_pels_log2_lookup[bs]); |
160 | 0 | } |
161 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
162 | | |
163 | | static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, |
164 | 10.8M | int mi_col, BLOCK_SIZE bsize, int segment_index) { |
165 | 10.8M | VP9_COMMON *const cm = &cpi->common; |
166 | 10.8M | const struct segmentation *const seg = &cm->seg; |
167 | 10.8M | MACROBLOCKD *const xd = &x->e_mbd; |
168 | 10.8M | MODE_INFO *mi = xd->mi[0]; |
169 | | |
170 | 10.8M | const AQ_MODE aq_mode = cpi->oxcf.aq_mode; |
171 | 10.8M | const uint8_t *const map = |
172 | 10.8M | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
173 | | |
174 | | // Initialize the segmentation index as 0. |
175 | 10.8M | mi->segment_id = 0; |
176 | | |
177 | | // Skip the rest if AQ mode is disabled. |
178 | 10.8M | if (!seg->enabled) return; |
179 | | |
180 | 0 | switch (aq_mode) { |
181 | 0 | case CYCLIC_REFRESH_AQ: |
182 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
183 | 0 | break; |
184 | 0 | #if !CONFIG_REALTIME_ONLY |
185 | 0 | case VARIANCE_AQ: |
186 | 0 | if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || |
187 | 0 | cpi->force_update_segmentation || |
188 | 0 | (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { |
189 | 0 | int min_energy; |
190 | 0 | int max_energy; |
191 | | // Get sub block energy range |
192 | 0 | if (bsize >= BLOCK_32X32) { |
193 | 0 | vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, |
194 | 0 | &max_energy); |
195 | 0 | } else { |
196 | 0 | min_energy = bsize <= BLOCK_16X16 ? x->mb_energy |
197 | 0 | : vp9_block_energy(cpi, x, bsize); |
198 | 0 | } |
199 | 0 | mi->segment_id = vp9_vaq_segment_id(min_energy); |
200 | 0 | } else { |
201 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
202 | 0 | } |
203 | 0 | break; |
204 | 0 | case EQUATOR360_AQ: |
205 | 0 | if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) |
206 | 0 | mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); |
207 | 0 | else |
208 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
209 | 0 | break; |
210 | 0 | #endif |
211 | 0 | case LOOKAHEAD_AQ: |
212 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
213 | 0 | break; |
214 | 0 | case PSNR_AQ: mi->segment_id = segment_index; break; |
215 | 0 | case PERCEPTUAL_AQ: mi->segment_id = x->segment_id; break; |
216 | 0 | default: |
217 | | // NO_AQ or PSNR_AQ |
218 | 0 | break; |
219 | 0 | } |
220 | | |
221 | | // Set segment index if ROI map or active_map is enabled. |
222 | 0 | if (cpi->roi.enabled || cpi->active_map.enabled) |
223 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
224 | |
|
225 | 0 | vp9_init_plane_quantizers(cpi, x); |
226 | 0 | } |
227 | | |
228 | | // Lighter version of set_offsets that only sets the mode info |
229 | | // pointers. |
230 | | static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, |
231 | | MACROBLOCK *const x, |
232 | | MACROBLOCKD *const xd, int mi_row, |
233 | 22.4M | int mi_col) { |
234 | 22.4M | const int idx_str = xd->mi_stride * mi_row + mi_col; |
235 | 22.4M | xd->mi = cm->mi_grid_visible + idx_str; |
236 | 22.4M | xd->mi[0] = cm->mi + idx_str; |
237 | 22.4M | x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); |
238 | 22.4M | } |
239 | | |
240 | | static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, |
241 | | const BLOCK_SIZE bsize, const int mi_row, |
242 | 0 | const int mi_col, int *const rdmult) { |
243 | 0 | const VP9_COMMON *const cm = &cpi->common; |
244 | |
|
245 | 0 | const int bsize_base = BLOCK_16X16; |
246 | 0 | const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base]; |
247 | 0 | const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base]; |
248 | 0 | const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; |
249 | 0 | const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; |
250 | 0 | const int num_bcols = |
251 | 0 | (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w; |
252 | 0 | const int num_brows = |
253 | 0 | (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h; |
254 | 0 | int row, col; |
255 | 0 | double num_of_mi = 0.0; |
256 | 0 | double geom_mean_of_scale = 0.0; |
257 | |
|
258 | 0 | assert(cpi->oxcf.tuning == VP8_TUNE_SSIM); |
259 | |
|
260 | 0 | for (row = mi_row / num_8x8_w; |
261 | 0 | row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) { |
262 | 0 | for (col = mi_col / num_8x8_h; |
263 | 0 | col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) { |
264 | 0 | const int index = row * num_cols + col; |
265 | 0 | geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]); |
266 | 0 | num_of_mi += 1.0; |
267 | 0 | } |
268 | 0 | } |
269 | 0 | geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi); |
270 | |
|
271 | 0 | *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale); |
272 | 0 | *rdmult = VPXMAX(*rdmult, 0); |
273 | 0 | set_error_per_bit(x, *rdmult); |
274 | 0 | vpx_clear_system_state(); |
275 | 0 | } |
276 | | |
277 | | static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, |
278 | | MACROBLOCK *const x, int mi_row, int mi_col, |
279 | 22.4M | BLOCK_SIZE bsize) { |
280 | 22.4M | VP9_COMMON *const cm = &cpi->common; |
281 | 22.4M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
282 | 22.4M | MACROBLOCKD *const xd = &x->e_mbd; |
283 | 22.4M | const int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
284 | 22.4M | const int mi_height = num_8x8_blocks_high_lookup[bsize]; |
285 | 22.4M | MvLimits *const mv_limits = &x->mv_limits; |
286 | | |
287 | 22.4M | set_skip_context(xd, mi_row, mi_col); |
288 | | |
289 | 22.4M | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
290 | | |
291 | | // Set up destination pointers. |
292 | 22.4M | vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); |
293 | | |
294 | | // Set up limit values for MV components. |
295 | | // Mv beyond the range do not produce new/different prediction block. |
296 | 22.4M | mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); |
297 | 22.4M | mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); |
298 | 22.4M | mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; |
299 | 22.4M | mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; |
300 | | |
301 | | // Set up distance of MB to edge of frame in 1/8th pel units. |
302 | 22.4M | assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); |
303 | 22.4M | set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, |
304 | 22.4M | cm->mi_cols); |
305 | | |
306 | | // Set up source buffers. |
307 | 22.4M | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
308 | | |
309 | | // R/D setup. |
310 | 22.4M | x->rddiv = cpi->rd.RDDIV; |
311 | 22.4M | x->rdmult = cpi->rd.RDMULT; |
312 | 22.4M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
313 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
314 | 0 | } |
315 | | |
316 | | // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() |
317 | 22.4M | xd->tile = *tile; |
318 | 22.4M | } |
319 | | |
320 | | static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, |
321 | | int mi_row, int mi_col, |
322 | 0 | BLOCK_SIZE bsize) { |
323 | 0 | const int block_width = |
324 | 0 | VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col); |
325 | 0 | const int block_height = |
326 | 0 | VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row); |
327 | 0 | const int mi_stride = xd->mi_stride; |
328 | 0 | MODE_INFO *const src_mi = xd->mi[0]; |
329 | 0 | int i, j; |
330 | |
|
331 | 0 | for (j = 0; j < block_height; ++j) |
332 | 0 | for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; |
333 | 0 | } |
334 | | |
335 | | static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x, |
336 | | MACROBLOCKD *const xd, int mi_row, int mi_col, |
337 | 0 | BLOCK_SIZE bsize) { |
338 | 0 | if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { |
339 | 0 | set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); |
340 | 0 | xd->mi[0]->sb_type = bsize; |
341 | 0 | } |
342 | 0 | } |
343 | | |
344 | | typedef struct { |
345 | | // This struct is used for computing variance in choose_partitioning(), where |
346 | | // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even |
347 | | // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16 |
348 | | // * 16 = 2^32). |
349 | | uint32_t sum_square_error; |
350 | | int32_t sum_error; |
351 | | int log2_count; |
352 | | int variance; |
353 | | } Var; |
354 | | |
355 | | typedef struct { |
356 | | Var none; |
357 | | Var horz[2]; |
358 | | Var vert[2]; |
359 | | } partition_variance; |
360 | | |
361 | | typedef struct { |
362 | | partition_variance part_variances; |
363 | | Var split[4]; |
364 | | } v4x4; |
365 | | |
366 | | typedef struct { |
367 | | partition_variance part_variances; |
368 | | v4x4 split[4]; |
369 | | } v8x8; |
370 | | |
371 | | typedef struct { |
372 | | partition_variance part_variances; |
373 | | v8x8 split[4]; |
374 | | } v16x16; |
375 | | |
376 | | typedef struct { |
377 | | partition_variance part_variances; |
378 | | v16x16 split[4]; |
379 | | } v32x32; |
380 | | |
381 | | typedef struct { |
382 | | partition_variance part_variances; |
383 | | v32x32 split[4]; |
384 | | } v64x64; |
385 | | |
386 | | typedef struct { |
387 | | partition_variance *part_variances; |
388 | | Var *split[4]; |
389 | | } variance_node; |
390 | | |
391 | | typedef enum { |
392 | | V16X16, |
393 | | V32X32, |
394 | | V64X64, |
395 | | } TREE_LEVEL; |
396 | | |
397 | 0 | static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { |
398 | 0 | int i; |
399 | 0 | node->part_variances = NULL; |
400 | 0 | switch (bsize) { |
401 | 0 | case BLOCK_64X64: { |
402 | 0 | v64x64 *vt = (v64x64 *)data; |
403 | 0 | node->part_variances = &vt->part_variances; |
404 | 0 | for (i = 0; i < 4; i++) |
405 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
406 | 0 | break; |
407 | 0 | } |
408 | 0 | case BLOCK_32X32: { |
409 | 0 | v32x32 *vt = (v32x32 *)data; |
410 | 0 | node->part_variances = &vt->part_variances; |
411 | 0 | for (i = 0; i < 4; i++) |
412 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
413 | 0 | break; |
414 | 0 | } |
415 | 0 | case BLOCK_16X16: { |
416 | 0 | v16x16 *vt = (v16x16 *)data; |
417 | 0 | node->part_variances = &vt->part_variances; |
418 | 0 | for (i = 0; i < 4; i++) |
419 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
420 | 0 | break; |
421 | 0 | } |
422 | 0 | case BLOCK_8X8: { |
423 | 0 | v8x8 *vt = (v8x8 *)data; |
424 | 0 | node->part_variances = &vt->part_variances; |
425 | 0 | for (i = 0; i < 4; i++) |
426 | 0 | node->split[i] = &vt->split[i].part_variances.none; |
427 | 0 | break; |
428 | 0 | } |
429 | 0 | default: { |
430 | 0 | v4x4 *vt = (v4x4 *)data; |
431 | 0 | assert(bsize == BLOCK_4X4); |
432 | 0 | node->part_variances = &vt->part_variances; |
433 | 0 | for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; |
434 | 0 | break; |
435 | 0 | } |
436 | 0 | } |
437 | 0 | } |
438 | | |
439 | | // Set variance values given sum square error, sum error, count. |
440 | 0 | static void fill_variance(uint32_t s2, int32_t s, int c, Var *v) { |
441 | 0 | v->sum_square_error = s2; |
442 | 0 | v->sum_error = s; |
443 | 0 | v->log2_count = c; |
444 | 0 | } |
445 | | |
446 | 0 | static void get_variance(Var *v) { |
447 | 0 | v->variance = |
448 | 0 | (int)(256 * (v->sum_square_error - |
449 | 0 | (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> |
450 | 0 | v->log2_count)) >> |
451 | 0 | v->log2_count); |
452 | 0 | } |
453 | | |
454 | 0 | static void sum_2_variances(const Var *a, const Var *b, Var *r) { |
455 | 0 | assert(a->log2_count == b->log2_count); |
456 | 0 | fill_variance(a->sum_square_error + b->sum_square_error, |
457 | 0 | a->sum_error + b->sum_error, a->log2_count + 1, r); |
458 | 0 | } |
459 | | |
460 | 0 | static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { |
461 | 0 | variance_node node; |
462 | 0 | memset(&node, 0, sizeof(node)); |
463 | 0 | tree_to_node(data, bsize, &node); |
464 | 0 | sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); |
465 | 0 | sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); |
466 | 0 | sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); |
467 | 0 | sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); |
468 | 0 | sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], |
469 | 0 | &node.part_variances->none); |
470 | 0 | } |
471 | | |
472 | | static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, |
473 | | MACROBLOCKD *const xd, void *data, |
474 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
475 | | int64_t threshold, BLOCK_SIZE bsize_min, |
476 | 0 | int force_split) { |
477 | 0 | VP9_COMMON *const cm = &cpi->common; |
478 | 0 | variance_node vt; |
479 | 0 | const int block_width = num_8x8_blocks_wide_lookup[bsize]; |
480 | 0 | const int block_height = num_8x8_blocks_high_lookup[bsize]; |
481 | |
|
482 | 0 | assert(block_height == block_width); |
483 | 0 | tree_to_node(data, bsize, &vt); |
484 | |
|
485 | 0 | if (force_split == 1) return 0; |
486 | | |
487 | | // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if |
488 | | // variance is below threshold, otherwise split will be selected. |
489 | | // No check for vert/horiz split as too few samples for variance. |
490 | 0 | if (bsize == bsize_min) { |
491 | | // Variance already computed to set the force_split. |
492 | 0 | if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
493 | 0 | if (mi_col + block_width / 2 < cm->mi_cols && |
494 | 0 | mi_row + block_height / 2 < cm->mi_rows && |
495 | 0 | vt.part_variances->none.variance < threshold) { |
496 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
497 | 0 | return 1; |
498 | 0 | } |
499 | 0 | return 0; |
500 | 0 | } else if (bsize > bsize_min) { |
501 | | // Variance already computed to set the force_split. |
502 | 0 | if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); |
503 | | // For key frame: take split for bsize above 32X32 or very high variance. |
504 | 0 | if (frame_is_intra_only(cm) && |
505 | 0 | (bsize > BLOCK_32X32 || |
506 | 0 | vt.part_variances->none.variance > (threshold << 4))) { |
507 | 0 | return 0; |
508 | 0 | } |
509 | | // If variance is low, take the bsize (no split). |
510 | 0 | if (mi_col + block_width / 2 < cm->mi_cols && |
511 | 0 | mi_row + block_height / 2 < cm->mi_rows && |
512 | 0 | vt.part_variances->none.variance < threshold) { |
513 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
514 | 0 | return 1; |
515 | 0 | } |
516 | | |
517 | | // Check vertical split. |
518 | 0 | if (mi_row + block_height / 2 < cm->mi_rows) { |
519 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); |
520 | 0 | get_variance(&vt.part_variances->vert[0]); |
521 | 0 | get_variance(&vt.part_variances->vert[1]); |
522 | 0 | if (vt.part_variances->vert[0].variance < threshold && |
523 | 0 | vt.part_variances->vert[1].variance < threshold && |
524 | 0 | get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { |
525 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
526 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); |
527 | 0 | return 1; |
528 | 0 | } |
529 | 0 | } |
530 | | // Check horizontal split. |
531 | 0 | if (mi_col + block_width / 2 < cm->mi_cols) { |
532 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); |
533 | 0 | get_variance(&vt.part_variances->horz[0]); |
534 | 0 | get_variance(&vt.part_variances->horz[1]); |
535 | 0 | if (vt.part_variances->horz[0].variance < threshold && |
536 | 0 | vt.part_variances->horz[1].variance < threshold && |
537 | 0 | get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { |
538 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
539 | 0 | set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); |
540 | 0 | return 1; |
541 | 0 | } |
542 | 0 | } |
543 | | |
544 | 0 | return 0; |
545 | 0 | } |
546 | 0 | return 0; |
547 | 0 | } |
548 | | |
549 | | static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, |
550 | | int width, int height, |
551 | 0 | int content_state) { |
552 | 0 | if (speed >= 8) { |
553 | 0 | if (width <= 640 && height <= 480) |
554 | 0 | return (5 * threshold_base) >> 2; |
555 | 0 | else if ((content_state == kLowSadLowSumdiff) || |
556 | 0 | (content_state == kHighSadLowSumdiff) || |
557 | 0 | (content_state == kLowVarHighSumdiff)) |
558 | 0 | return (5 * threshold_base) >> 2; |
559 | 0 | } else if (speed == 7) { |
560 | 0 | if ((content_state == kLowSadLowSumdiff) || |
561 | 0 | (content_state == kHighSadLowSumdiff) || |
562 | 0 | (content_state == kLowVarHighSumdiff)) { |
563 | 0 | return (5 * threshold_base) >> 2; |
564 | 0 | } |
565 | 0 | } |
566 | 0 | return threshold_base; |
567 | 0 | } |
568 | | |
569 | | // Set the variance split thresholds for following the block sizes: |
570 | | // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, |
571 | | // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is |
572 | | // currently only used on key frame. |
573 | | static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, |
574 | 0 | int content_state) { |
575 | 0 | VP9_COMMON *const cm = &cpi->common; |
576 | 0 | const int is_key_frame = frame_is_intra_only(cm); |
577 | 0 | const int threshold_multiplier = |
578 | 0 | is_key_frame ? 20 : cpi->sf.variance_part_thresh_mult; |
579 | 0 | int64_t threshold_base = |
580 | 0 | (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); |
581 | |
|
582 | 0 | if (is_key_frame) { |
583 | 0 | thresholds[0] = threshold_base; |
584 | 0 | thresholds[1] = threshold_base >> 2; |
585 | 0 | thresholds[2] = threshold_base >> 2; |
586 | 0 | thresholds[3] = threshold_base << 2; |
587 | 0 | } else { |
588 | | // Increase base variance threshold based on estimated noise level. |
589 | 0 | if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) { |
590 | 0 | NOISE_LEVEL noise_level = |
591 | 0 | vp9_noise_estimate_extract_level(&cpi->noise_estimate); |
592 | 0 | if (noise_level == kHigh) |
593 | 0 | threshold_base = 3 * threshold_base; |
594 | 0 | else if (noise_level == kMedium) |
595 | 0 | threshold_base = threshold_base << 1; |
596 | 0 | else if (noise_level < kLow) |
597 | 0 | threshold_base = (7 * threshold_base) >> 3; |
598 | 0 | } |
599 | | #if CONFIG_VP9_TEMPORAL_DENOISING |
600 | | if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && |
601 | | cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) |
602 | | threshold_base = |
603 | | vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level, |
604 | | content_state, cpi->svc.temporal_layer_id); |
605 | | else |
606 | | threshold_base = |
607 | | scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, |
608 | | cm->height, content_state); |
609 | | #else |
610 | | // Increase base variance threshold based on content_state/sum_diff level. |
611 | 0 | threshold_base = scale_part_thresh_sumdiff( |
612 | 0 | threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); |
613 | 0 | #endif |
614 | 0 | thresholds[0] = threshold_base; |
615 | 0 | thresholds[2] = threshold_base << cpi->oxcf.speed; |
616 | 0 | if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7) |
617 | 0 | thresholds[2] = thresholds[2] << 1; |
618 | 0 | if (cm->width <= 352 && cm->height <= 288) { |
619 | 0 | thresholds[0] = threshold_base >> 3; |
620 | 0 | thresholds[1] = threshold_base >> 1; |
621 | 0 | thresholds[2] = threshold_base << 3; |
622 | 0 | if (cpi->rc.avg_frame_qindex[INTER_FRAME] > 220) |
623 | 0 | thresholds[2] = thresholds[2] << 2; |
624 | 0 | else if (cpi->rc.avg_frame_qindex[INTER_FRAME] > 200) |
625 | 0 | thresholds[2] = thresholds[2] << 1; |
626 | 0 | } else if (cm->width < 1280 && cm->height < 720) { |
627 | 0 | thresholds[1] = (5 * threshold_base) >> 2; |
628 | 0 | } else if (cm->width < 1920 && cm->height < 1080) { |
629 | 0 | thresholds[1] = threshold_base << 1; |
630 | 0 | } else { |
631 | 0 | thresholds[1] = (5 * threshold_base) >> 1; |
632 | 0 | } |
633 | 0 | if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; |
634 | 0 | } |
635 | 0 | } |
636 | | |
637 | | void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, |
638 | 54.1k | int content_state) { |
639 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
640 | 54.1k | SPEED_FEATURES *const sf = &cpi->sf; |
641 | 54.1k | const int is_key_frame = frame_is_intra_only(cm); |
642 | 54.1k | if (sf->partition_search_type != VAR_BASED_PARTITION && |
643 | 54.1k | sf->partition_search_type != REFERENCE_PARTITION) { |
644 | 54.1k | return; |
645 | 54.1k | } else { |
646 | 0 | set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); |
647 | | // The thresholds below are not changed locally. |
648 | 0 | if (is_key_frame) { |
649 | 0 | cpi->vbp_threshold_sad = 0; |
650 | 0 | cpi->vbp_threshold_copy = 0; |
651 | 0 | cpi->vbp_bsize_min = BLOCK_8X8; |
652 | 0 | } else { |
653 | 0 | if (cm->width <= 352 && cm->height <= 288) |
654 | 0 | cpi->vbp_threshold_sad = 10; |
655 | 0 | else |
656 | 0 | cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 |
657 | 0 | ? (cpi->y_dequant[q][1] << 1) |
658 | 0 | : 1000; |
659 | 0 | cpi->vbp_bsize_min = BLOCK_16X16; |
660 | 0 | if (cm->width <= 352 && cm->height <= 288) |
661 | 0 | cpi->vbp_threshold_copy = 4000; |
662 | 0 | else if (cm->width <= 640 && cm->height <= 360) |
663 | 0 | cpi->vbp_threshold_copy = 8000; |
664 | 0 | else |
665 | 0 | cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 |
666 | 0 | ? (cpi->y_dequant[q][1] << 3) |
667 | 0 | : 8000; |
668 | 0 | if (cpi->rc.high_source_sad || |
669 | 0 | (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { |
670 | 0 | cpi->vbp_threshold_sad = 0; |
671 | 0 | cpi->vbp_threshold_copy = 0; |
672 | 0 | } |
673 | 0 | } |
674 | 0 | cpi->vbp_threshold_minmax = 15 + (q >> 3); |
675 | 0 | } |
676 | 54.1k | } |
677 | | |
678 | | // Compute the minmax over the 8x8 subblocks. |
679 | | static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, |
680 | | int dp, int x16_idx, int y16_idx, |
681 | | #if CONFIG_VP9_HIGHBITDEPTH |
682 | | int highbd_flag, |
683 | | #endif |
684 | 0 | int pixels_wide, int pixels_high) { |
685 | 0 | int k; |
686 | 0 | int minmax_max = 0; |
687 | 0 | int minmax_min = 255; |
688 | | // Loop over the 4 8x8 subblocks. |
689 | 0 | for (k = 0; k < 4; k++) { |
690 | 0 | int x8_idx = x16_idx + ((k & 1) << 3); |
691 | 0 | int y8_idx = y16_idx + ((k >> 1) << 3); |
692 | 0 | int min = 0; |
693 | 0 | int max = 0; |
694 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
695 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
696 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
697 | 0 | vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, |
698 | 0 | d + y8_idx * dp + x8_idx, dp, &min, &max); |
699 | 0 | } else { |
700 | 0 | vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, |
701 | 0 | dp, &min, &max); |
702 | 0 | } |
703 | | #else |
704 | | vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, |
705 | | &min, &max); |
706 | | #endif |
707 | 0 | if ((max - min) > minmax_max) minmax_max = (max - min); |
708 | 0 | if ((max - min) < minmax_min) minmax_min = (max - min); |
709 | 0 | } |
710 | 0 | } |
711 | 0 | return (minmax_max - minmax_min); |
712 | 0 | } |
713 | | |
714 | | static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, |
715 | | int dp, int x8_idx, int y8_idx, v8x8 *vst, |
716 | | #if CONFIG_VP9_HIGHBITDEPTH |
717 | | int highbd_flag, |
718 | | #endif |
719 | | int pixels_wide, int pixels_high, |
720 | 0 | int is_key_frame) { |
721 | 0 | int k; |
722 | 0 | for (k = 0; k < 4; k++) { |
723 | 0 | int x4_idx = x8_idx + ((k & 1) << 2); |
724 | 0 | int y4_idx = y8_idx + ((k >> 1) << 2); |
725 | 0 | unsigned int sse = 0; |
726 | 0 | int sum = 0; |
727 | 0 | if (x4_idx < pixels_wide && y4_idx < pixels_high) { |
728 | 0 | int s_avg; |
729 | 0 | int d_avg = 128; |
730 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
731 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
732 | 0 | s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
733 | 0 | if (!is_key_frame) |
734 | 0 | d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
735 | 0 | } else { |
736 | 0 | s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
737 | 0 | if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
738 | 0 | } |
739 | | #else |
740 | | s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); |
741 | | if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); |
742 | | #endif |
743 | 0 | sum = s_avg - d_avg; |
744 | 0 | sse = sum * sum; |
745 | 0 | } |
746 | 0 | fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); |
747 | 0 | } |
748 | 0 | } |
749 | | |
750 | | static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, |
751 | | int dp, int x16_idx, int y16_idx, v16x16 *vst, |
752 | | #if CONFIG_VP9_HIGHBITDEPTH |
753 | | int highbd_flag, |
754 | | #endif |
755 | | int pixels_wide, int pixels_high, |
756 | 0 | int is_key_frame) { |
757 | 0 | int k; |
758 | 0 | for (k = 0; k < 4; k++) { |
759 | 0 | int x8_idx = x16_idx + ((k & 1) << 3); |
760 | 0 | int y8_idx = y16_idx + ((k >> 1) << 3); |
761 | 0 | unsigned int sse = 0; |
762 | 0 | int sum = 0; |
763 | 0 | if (x8_idx < pixels_wide && y8_idx < pixels_high) { |
764 | 0 | int s_avg; |
765 | 0 | int d_avg = 128; |
766 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
767 | 0 | if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { |
768 | 0 | s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
769 | 0 | if (!is_key_frame) |
770 | 0 | d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
771 | 0 | } else { |
772 | 0 | s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
773 | 0 | if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
774 | 0 | } |
775 | | #else |
776 | | s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); |
777 | | if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); |
778 | | #endif |
779 | 0 | sum = s_avg - d_avg; |
780 | 0 | sse = sum * sum; |
781 | 0 | } |
782 | 0 | fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); |
783 | 0 | } |
784 | 0 | } |
785 | | |
786 | | // Check if most of the superblock is skin content, and if so, force split to |
787 | | // 32x32, and set x->sb_is_skin for use in mode selection. |
788 | | static int skin_sb_split(VP9_COMP *cpi, const int low_res, int mi_row, |
789 | 0 | int mi_col, int *force_split) { |
790 | 0 | VP9_COMMON *const cm = &cpi->common; |
791 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
792 | 0 | if (cm->use_highbitdepth) return 0; |
793 | 0 | #endif |
794 | | // Avoid checking superblocks on/near boundary and avoid low resolutions. |
795 | | // Note superblock may still pick 64X64 if y_sad is very small |
796 | | // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. |
797 | 0 | if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && |
798 | 0 | mi_row + 8 < cm->mi_rows)) { |
799 | 0 | int num_16x16_skin = 0; |
800 | 0 | int num_16x16_nonskin = 0; |
801 | 0 | const int block_index = mi_row * cm->mi_cols + mi_col; |
802 | 0 | const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
803 | 0 | const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
804 | 0 | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
805 | 0 | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
806 | | // Loop through the 16x16 sub-blocks. |
807 | 0 | int i, j; |
808 | 0 | for (i = 0; i < ymis; i += 2) { |
809 | 0 | for (j = 0; j < xmis; j += 2) { |
810 | 0 | int bl_index = block_index + i * cm->mi_cols + j; |
811 | 0 | int is_skin = cpi->skin_map[bl_index]; |
812 | 0 | num_16x16_skin += is_skin; |
813 | 0 | num_16x16_nonskin += (1 - is_skin); |
814 | 0 | if (num_16x16_nonskin > 3) { |
815 | | // Exit loop if at least 4 of the 16x16 blocks are not skin. |
816 | 0 | i = ymis; |
817 | 0 | break; |
818 | 0 | } |
819 | 0 | } |
820 | 0 | } |
821 | 0 | if (num_16x16_skin > 12) { |
822 | 0 | *force_split = 1; |
823 | 0 | return 1; |
824 | 0 | } |
825 | 0 | } |
826 | 0 | return 0; |
827 | 0 | } |
828 | | |
829 | | static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
830 | | v64x64 *vt, int64_t thresholds[], |
831 | | MV_REFERENCE_FRAME ref_frame_partition, |
832 | 0 | int mi_col, int mi_row) { |
833 | 0 | int i, j; |
834 | 0 | VP9_COMMON *const cm = &cpi->common; |
835 | 0 | const int mv_thr = cm->width > 640 ? 8 : 4; |
836 | | // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and |
837 | | // int_pro mv is small. If the temporal variance is small set the flag |
838 | | // variance_low for the block. The variance threshold can be adjusted, the |
839 | | // higher the more aggressive. |
840 | 0 | if (ref_frame_partition == LAST_FRAME && |
841 | 0 | (cpi->sf.short_circuit_low_temp_var == 1 || |
842 | 0 | (xd->mi[0]->mv[0].as_mv.col < mv_thr && |
843 | 0 | xd->mi[0]->mv[0].as_mv.col > -mv_thr && |
844 | 0 | xd->mi[0]->mv[0].as_mv.row < mv_thr && |
845 | 0 | xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { |
846 | 0 | if (xd->mi[0]->sb_type == BLOCK_64X64) { |
847 | 0 | if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) |
848 | 0 | x->variance_low[0] = 1; |
849 | 0 | } else if (xd->mi[0]->sb_type == BLOCK_64X32) { |
850 | 0 | for (i = 0; i < 2; i++) { |
851 | 0 | if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) |
852 | 0 | x->variance_low[i + 1] = 1; |
853 | 0 | } |
854 | 0 | } else if (xd->mi[0]->sb_type == BLOCK_32X64) { |
855 | 0 | for (i = 0; i < 2; i++) { |
856 | 0 | if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) |
857 | 0 | x->variance_low[i + 3] = 1; |
858 | 0 | } |
859 | 0 | } else { |
860 | 0 | for (i = 0; i < 4; i++) { |
861 | 0 | const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }; |
862 | 0 | const int idx_str = |
863 | 0 | cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1]; |
864 | 0 | MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; |
865 | |
|
866 | 0 | if (cm->mi_cols <= mi_col + idx[i][1] || |
867 | 0 | cm->mi_rows <= mi_row + idx[i][0]) |
868 | 0 | continue; |
869 | | |
870 | 0 | if ((*this_mi)->sb_type == BLOCK_32X32) { |
871 | 0 | int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 || |
872 | 0 | cpi->sf.short_circuit_low_temp_var == 3) |
873 | 0 | ? ((5 * thresholds[1]) >> 3) |
874 | 0 | : (thresholds[1] >> 1); |
875 | 0 | if (vt->split[i].part_variances.none.variance < threshold_32x32) |
876 | 0 | x->variance_low[i + 5] = 1; |
877 | 0 | } else if (cpi->sf.short_circuit_low_temp_var >= 2) { |
878 | | // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block |
879 | | // inside. |
880 | 0 | if ((*this_mi)->sb_type == BLOCK_16X16 || |
881 | 0 | (*this_mi)->sb_type == BLOCK_32X16 || |
882 | 0 | (*this_mi)->sb_type == BLOCK_16X32) { |
883 | 0 | for (j = 0; j < 4; j++) { |
884 | 0 | if (vt->split[i].split[j].part_variances.none.variance < |
885 | 0 | (thresholds[2] >> 8)) |
886 | 0 | x->variance_low[(i << 2) + j + 9] = 1; |
887 | 0 | } |
888 | 0 | } |
889 | 0 | } |
890 | 0 | } |
891 | 0 | } |
892 | 0 | } |
893 | 0 | } |
894 | | |
895 | | static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, |
896 | | MACROBLOCKD *xd, BLOCK_SIZE bsize, |
897 | 0 | int mi_row, int mi_col) { |
898 | 0 | VP9_COMMON *const cm = &cpi->common; |
899 | 0 | BLOCK_SIZE *prev_part = cpi->prev_partition; |
900 | 0 | int start_pos = mi_row * cm->mi_stride + mi_col; |
901 | |
|
902 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
903 | 0 | const int bs = (1 << bsl) >> 2; |
904 | 0 | BLOCK_SIZE subsize; |
905 | 0 | PARTITION_TYPE partition; |
906 | |
|
907 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
908 | | |
909 | 0 | partition = partition_lookup[bsl][prev_part[start_pos]]; |
910 | 0 | subsize = get_subsize(bsize, partition); |
911 | |
|
912 | 0 | if (subsize < BLOCK_8X8) { |
913 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
914 | 0 | } else { |
915 | 0 | switch (partition) { |
916 | 0 | case PARTITION_NONE: |
917 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, bsize); |
918 | 0 | break; |
919 | 0 | case PARTITION_HORZ: |
920 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
921 | 0 | set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize); |
922 | 0 | break; |
923 | 0 | case PARTITION_VERT: |
924 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, subsize); |
925 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); |
926 | 0 | break; |
927 | 0 | default: |
928 | 0 | assert(partition == PARTITION_SPLIT); |
929 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); |
930 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); |
931 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); |
932 | 0 | copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); |
933 | 0 | break; |
934 | 0 | } |
935 | 0 | } |
936 | 0 | } |
937 | | |
938 | | static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
939 | | int mi_row, int mi_col, int segment_id, |
940 | 0 | int sb_offset) { |
941 | 0 | int svc_copy_allowed = 1; |
942 | 0 | int frames_since_key_thresh = 1; |
943 | 0 | if (cpi->use_svc) { |
944 | | // For SVC, don't allow copy if base spatial layer is key frame, or if |
945 | | // frame is not a temporal enhancement layer frame. |
946 | 0 | int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, |
947 | 0 | cpi->svc.number_temporal_layers); |
948 | 0 | const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; |
949 | 0 | if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0; |
950 | 0 | frames_since_key_thresh = cpi->svc.number_spatial_layers << 1; |
951 | 0 | } |
952 | 0 | if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed && |
953 | 0 | !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE && |
954 | 0 | cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE && |
955 | 0 | cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) { |
956 | 0 | if (cpi->prev_partition != NULL) { |
957 | 0 | copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col); |
958 | 0 | cpi->copied_frame_cnt[sb_offset] += 1; |
959 | 0 | memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]), |
960 | 0 | sizeof(x->variance_low)); |
961 | 0 | return 1; |
962 | 0 | } |
963 | 0 | } |
964 | | |
965 | 0 | return 0; |
966 | 0 | } |
967 | | |
968 | | // Set the partition for mi_col/row_high (current resolution) based on |
969 | | // the previous spatial layer (mi_col/row). Returns 0 if partition is set, |
970 | | // returns 1 if no scale partitioning is done. Return 1 means the variance |
971 | | // partitioning will be used. |
972 | | static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
973 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
974 | 0 | int mi_row_high, int mi_col_high) { |
975 | 0 | VP9_COMMON *const cm = &cpi->common; |
976 | 0 | SVC *const svc = &cpi->svc; |
977 | 0 | BLOCK_SIZE *prev_part = svc->prev_partition_svc; |
978 | | // Variables with _high are for higher resolution. |
979 | 0 | int bsize_high = 0; |
980 | 0 | int subsize_high = 0; |
981 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
982 | 0 | const int bs = (1 << bsl) >> 2; |
983 | 0 | const int has_rows = (mi_row_high + bs) < cm->mi_rows; |
984 | 0 | const int has_cols = (mi_col_high + bs) < cm->mi_cols; |
985 | |
|
986 | 0 | int start_pos; |
987 | 0 | BLOCK_SIZE bsize_low; |
988 | 0 | PARTITION_TYPE partition_high; |
989 | | |
990 | | // If the lower layer frame is outside the boundary (this can happen for |
991 | | // odd size resolutions) then do not scale partitioning from the lower |
992 | | // layer. Do variance based partitioning instead (return 1). |
993 | 0 | if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || |
994 | 0 | mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) |
995 | 0 | return 1; |
996 | | |
997 | | // Do not scale partitioning from lower layers on the boundary. Do |
998 | | // variance based partitioning instead (return 1). |
999 | 0 | if (!has_rows || !has_cols) return 1; |
1000 | | |
1001 | | // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. |
1002 | 0 | start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; |
1003 | 0 | bsize_low = prev_part[start_pos]; |
1004 | | |
1005 | | // For reference frames: return 1 (do variance-based partitioning) if the |
1006 | | // superblock is not low source sad and lower-resoln bsize is below 32x32. |
1007 | 0 | if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad && |
1008 | 0 | bsize_low < BLOCK_32X32) |
1009 | 0 | return 1; |
1010 | | |
1011 | | // Scale up block size by 2x2. Force 64x64 for size larger than 32x32. |
1012 | 0 | if (bsize_low < BLOCK_32X32) { |
1013 | 0 | bsize_high = bsize_low + 3; |
1014 | 0 | } else if (bsize_low >= BLOCK_32X32) { |
1015 | 0 | bsize_high = BLOCK_64X64; |
1016 | 0 | } |
1017 | |
|
1018 | 0 | partition_high = partition_lookup[bsl][bsize_high]; |
1019 | 0 | subsize_high = get_subsize(bsize, partition_high); |
1020 | |
|
1021 | 0 | if (subsize_high < BLOCK_8X8) { |
1022 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); |
1023 | 0 | } else { |
1024 | 0 | switch (partition_high) { |
1025 | 0 | case PARTITION_NONE: |
1026 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); |
1027 | 0 | break; |
1028 | 0 | case PARTITION_HORZ: |
1029 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); |
1030 | 0 | if (subsize_high < BLOCK_64X64) |
1031 | 0 | set_block_size(cpi, x, xd, mi_row_high + bs, mi_col_high, |
1032 | 0 | subsize_high); |
1033 | 0 | break; |
1034 | 0 | case PARTITION_VERT: |
1035 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); |
1036 | 0 | if (subsize_high < BLOCK_64X64) |
1037 | 0 | set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs, |
1038 | 0 | subsize_high); |
1039 | 0 | break; |
1040 | 0 | default: |
1041 | 0 | assert(partition_high == PARTITION_SPLIT); |
1042 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, |
1043 | 0 | mi_row_high, mi_col_high)) |
1044 | 0 | return 1; |
1045 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), |
1046 | 0 | mi_col, mi_row_high + bs, mi_col_high)) |
1047 | 0 | return 1; |
1048 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, |
1049 | 0 | mi_col + (bs >> 1), mi_row_high, |
1050 | 0 | mi_col_high + bs)) |
1051 | 0 | return 1; |
1052 | 0 | if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), |
1053 | 0 | mi_col + (bs >> 1), mi_row_high + bs, |
1054 | 0 | mi_col_high + bs)) |
1055 | 0 | return 1; |
1056 | 0 | break; |
1057 | 0 | } |
1058 | 0 | } |
1059 | | |
1060 | 0 | return 0; |
1061 | 0 | } |
1062 | | |
1063 | | static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
1064 | 0 | int mi_col) { |
1065 | 0 | VP9_COMMON *const cm = &cpi->common; |
1066 | 0 | BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc; |
1067 | 0 | int start_pos = mi_row * cm->mi_stride + mi_col; |
1068 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
1069 | 0 | const int bs = (1 << bsl) >> 2; |
1070 | 0 | BLOCK_SIZE subsize; |
1071 | 0 | PARTITION_TYPE partition; |
1072 | 0 | const MODE_INFO *mi = NULL; |
1073 | 0 | int xx, yy; |
1074 | |
|
1075 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
1076 | | |
1077 | 0 | mi = cm->mi_grid_visible[start_pos]; |
1078 | 0 | partition = partition_lookup[bsl][mi->sb_type]; |
1079 | 0 | subsize = get_subsize(bsize, partition); |
1080 | 0 | if (subsize < BLOCK_8X8) { |
1081 | 0 | prev_part[start_pos] = bsize; |
1082 | 0 | } else { |
1083 | 0 | switch (partition) { |
1084 | 0 | case PARTITION_NONE: |
1085 | 0 | prev_part[start_pos] = bsize; |
1086 | 0 | if (bsize == BLOCK_64X64) { |
1087 | 0 | for (xx = 0; xx < 8; xx += 4) |
1088 | 0 | for (yy = 0; yy < 8; yy += 4) { |
1089 | 0 | if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols)) |
1090 | 0 | prev_part[start_pos + xx * cm->mi_stride + yy] = bsize; |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | break; |
1094 | 0 | case PARTITION_HORZ: |
1095 | 0 | prev_part[start_pos] = subsize; |
1096 | 0 | if (mi_row + bs < cm->mi_rows) |
1097 | 0 | prev_part[start_pos + bs * cm->mi_stride] = subsize; |
1098 | 0 | break; |
1099 | 0 | case PARTITION_VERT: |
1100 | 0 | prev_part[start_pos] = subsize; |
1101 | 0 | if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; |
1102 | 0 | break; |
1103 | 0 | default: |
1104 | 0 | assert(partition == PARTITION_SPLIT); |
1105 | 0 | update_partition_svc(cpi, subsize, mi_row, mi_col); |
1106 | 0 | update_partition_svc(cpi, subsize, mi_row + bs, mi_col); |
1107 | 0 | update_partition_svc(cpi, subsize, mi_row, mi_col + bs); |
1108 | 0 | update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); |
1109 | 0 | break; |
1110 | 0 | } |
1111 | 0 | } |
1112 | 0 | } |
1113 | | |
1114 | | static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, |
1115 | 0 | int mi_row, int mi_col) { |
1116 | 0 | VP9_COMMON *const cm = &cpi->common; |
1117 | 0 | BLOCK_SIZE *prev_part = cpi->prev_partition; |
1118 | 0 | int start_pos = mi_row * cm->mi_stride + mi_col; |
1119 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
1120 | 0 | const int bs = (1 << bsl) >> 2; |
1121 | 0 | BLOCK_SIZE subsize; |
1122 | 0 | PARTITION_TYPE partition; |
1123 | 0 | const MODE_INFO *mi = NULL; |
1124 | |
|
1125 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
1126 | | |
1127 | 0 | mi = cm->mi_grid_visible[start_pos]; |
1128 | 0 | partition = partition_lookup[bsl][mi->sb_type]; |
1129 | 0 | subsize = get_subsize(bsize, partition); |
1130 | 0 | if (subsize < BLOCK_8X8) { |
1131 | 0 | prev_part[start_pos] = bsize; |
1132 | 0 | } else { |
1133 | 0 | switch (partition) { |
1134 | 0 | case PARTITION_NONE: prev_part[start_pos] = bsize; break; |
1135 | 0 | case PARTITION_HORZ: |
1136 | 0 | prev_part[start_pos] = subsize; |
1137 | 0 | if (mi_row + bs < cm->mi_rows) |
1138 | 0 | prev_part[start_pos + bs * cm->mi_stride] = subsize; |
1139 | 0 | break; |
1140 | 0 | case PARTITION_VERT: |
1141 | 0 | prev_part[start_pos] = subsize; |
1142 | 0 | if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; |
1143 | 0 | break; |
1144 | 0 | default: |
1145 | 0 | assert(partition == PARTITION_SPLIT); |
1146 | 0 | update_prev_partition_helper(cpi, subsize, mi_row, mi_col); |
1147 | 0 | update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); |
1148 | 0 | update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); |
1149 | 0 | update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); |
1150 | 0 | break; |
1151 | 0 | } |
1152 | 0 | } |
1153 | 0 | } |
1154 | | |
1155 | | static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id, |
1156 | 0 | int mi_row, int mi_col, int sb_offset) { |
1157 | 0 | update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col); |
1158 | 0 | cpi->prev_segment_id[sb_offset] = segment_id; |
1159 | 0 | memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low, |
1160 | 0 | sizeof(x->variance_low)); |
1161 | | // Reset the counter for copy partitioning |
1162 | 0 | cpi->copied_frame_cnt[sb_offset] = 0; |
1163 | 0 | } |
1164 | | |
1165 | | static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, |
1166 | | unsigned int y_sad, int is_key_frame, |
1167 | 0 | int scene_change_detected) { |
1168 | 0 | int i; |
1169 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1170 | 0 | int shift = 2; |
1171 | |
|
1172 | 0 | if (is_key_frame) return; |
1173 | | |
1174 | | // For speed > 8, avoid the chroma check if y_sad is above threshold. |
1175 | 0 | if (cpi->oxcf.speed > 8) { |
1176 | 0 | if (y_sad > cpi->vbp_thresholds[1] && |
1177 | 0 | (!cpi->noise_estimate.enabled || |
1178 | 0 | vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) |
1179 | 0 | return; |
1180 | 0 | } |
1181 | | |
1182 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && scene_change_detected) |
1183 | 0 | shift = 5; |
1184 | |
|
1185 | 0 | for (i = 1; i <= 2; ++i) { |
1186 | 0 | unsigned int uv_sad = UINT_MAX; |
1187 | 0 | struct macroblock_plane *p = &x->plane[i]; |
1188 | 0 | struct macroblockd_plane *pd = &xd->plane[i]; |
1189 | 0 | const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); |
1190 | |
|
1191 | 0 | if (bs != BLOCK_INVALID) |
1192 | 0 | uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, |
1193 | 0 | pd->dst.stride); |
1194 | | |
1195 | | // TODO(marpan): Investigate if we should lower this threshold if |
1196 | | // superblock is detected as skin. |
1197 | 0 | x->color_sensitivity[i - 1] = uv_sad > (y_sad >> shift); |
1198 | 0 | } |
1199 | 0 | } |
1200 | | |
1201 | | static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, |
1202 | 0 | int sb_offset) { |
1203 | 0 | unsigned int tmp_sse; |
1204 | 0 | uint64_t tmp_sad; |
1205 | 0 | unsigned int tmp_variance; |
1206 | 0 | const BLOCK_SIZE bsize = BLOCK_64X64; |
1207 | 0 | uint8_t *src_y = cpi->Source->y_buffer; |
1208 | 0 | int src_ystride = cpi->Source->y_stride; |
1209 | 0 | uint8_t *last_src_y = cpi->Last_Source->y_buffer; |
1210 | 0 | int last_src_ystride = cpi->Last_Source->y_stride; |
1211 | 0 | uint64_t avg_source_sad_threshold = 10000; |
1212 | 0 | uint64_t avg_source_sad_threshold2 = 12000; |
1213 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1214 | 0 | if (cpi->common.use_highbitdepth) return 0; |
1215 | 0 | #endif |
1216 | 0 | src_y += shift; |
1217 | 0 | last_src_y += shift; |
1218 | 0 | tmp_sad = |
1219 | 0 | cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); |
1220 | 0 | tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, |
1221 | 0 | last_src_ystride, &tmp_sse); |
1222 | | // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) |
1223 | 0 | if (tmp_sad < avg_source_sad_threshold) |
1224 | 0 | x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff |
1225 | 0 | : kLowSadHighSumdiff; |
1226 | 0 | else |
1227 | 0 | x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff |
1228 | 0 | : kHighSadHighSumdiff; |
1229 | | |
1230 | | // Detect large lighting change. |
1231 | 0 | if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && |
1232 | 0 | cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) && |
1233 | 0 | (tmp_sse - tmp_variance) > 10000) |
1234 | 0 | x->content_state_sb = kLowVarHighSumdiff; |
1235 | 0 | else if (tmp_sad > (avg_source_sad_threshold << 1)) |
1236 | 0 | x->content_state_sb = kVeryHighSad; |
1237 | |
|
1238 | 0 | if (cpi->content_state_sb_fd != NULL) { |
1239 | 0 | if (tmp_sad < avg_source_sad_threshold2) { |
1240 | | // Cap the increment to 255. |
1241 | 0 | if (cpi->content_state_sb_fd[sb_offset] < 255) |
1242 | 0 | cpi->content_state_sb_fd[sb_offset]++; |
1243 | 0 | } else { |
1244 | 0 | cpi->content_state_sb_fd[sb_offset] = 0; |
1245 | 0 | } |
1246 | 0 | } |
1247 | 0 | if (tmp_sad == 0) x->zero_temp_sad_source = 1; |
1248 | 0 | return tmp_sad; |
1249 | 0 | } |
1250 | | |
1251 | | // This function chooses partitioning based on the variance between source and |
1252 | | // reconstructed last, where variance is computed for down-sampled inputs. |
1253 | | static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, |
1254 | 0 | MACROBLOCK *x, int mi_row, int mi_col) { |
1255 | 0 | VP9_COMMON *const cm = &cpi->common; |
1256 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1257 | 0 | int i, j, k, m; |
1258 | 0 | v64x64 vt; |
1259 | 0 | v16x16 *vt2 = NULL; |
1260 | 0 | int force_split[21]; |
1261 | 0 | int avg_32x32; |
1262 | 0 | int max_var_32x32 = 0; |
1263 | 0 | int min_var_32x32 = INT_MAX; |
1264 | 0 | int var_32x32; |
1265 | 0 | int avg_16x16[4]; |
1266 | 0 | int maxvar_16x16[4]; |
1267 | 0 | int minvar_16x16[4]; |
1268 | 0 | int64_t threshold_4x4avg; |
1269 | 0 | NOISE_LEVEL noise_level = kLow; |
1270 | 0 | int content_state = 0; |
1271 | 0 | uint8_t *s; |
1272 | 0 | const uint8_t *d; |
1273 | 0 | int sp; |
1274 | 0 | int dp; |
1275 | 0 | int compute_minmax_variance = 1; |
1276 | 0 | unsigned int y_sad = UINT_MAX; |
1277 | 0 | BLOCK_SIZE bsize = BLOCK_64X64; |
1278 | | // Ref frame used in partitioning. |
1279 | 0 | MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; |
1280 | 0 | int pixels_wide = 64, pixels_high = 64; |
1281 | 0 | int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], |
1282 | 0 | cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; |
1283 | 0 | int scene_change_detected = |
1284 | 0 | cpi->rc.high_source_sad || |
1285 | 0 | (cpi->use_svc && cpi->svc.high_source_sad_superframe); |
1286 | 0 | int force_64_split = scene_change_detected || |
1287 | 0 | (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
1288 | 0 | cpi->compute_source_sad_onepass && |
1289 | 0 | cpi->sf.use_source_sad && !x->zero_temp_sad_source); |
1290 | | |
1291 | | // For the variance computation under SVC mode, we treat the frame as key if |
1292 | | // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). |
1293 | 0 | int is_key_frame = |
1294 | 0 | (frame_is_intra_only(cm) || |
1295 | 0 | (is_one_pass_svc(cpi) && |
1296 | 0 | cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); |
1297 | |
|
1298 | 0 | if (!is_key_frame) { |
1299 | 0 | if (cm->frame_refs[LAST_FRAME - 1].sf.x_scale_fp == REF_INVALID_SCALE || |
1300 | 0 | cm->frame_refs[LAST_FRAME - 1].sf.y_scale_fp == REF_INVALID_SCALE) |
1301 | 0 | is_key_frame = 1; |
1302 | 0 | } |
1303 | | |
1304 | | // Allow for sub8x8 (4x4) partition on key frames, but only for hybrid mode |
1305 | | // (i.e., sf->nonrd_keyframe = 0), where for small blocks rd intra pickmode |
1306 | | // (vp9_rd_pick_intra_mode_sb) is used. The nonrd intra pickmode |
1307 | | // (vp9_pick_intra_mode) does not currently support sub8x8 blocks. This causes |
1308 | | // the issue: 44166813. Assert is added in vp9_pick_intra_mode to check this. |
1309 | 0 | const int use_4x4_partition = |
1310 | 0 | frame_is_intra_only(cm) && !cpi->sf.nonrd_keyframe; |
1311 | 0 | const int low_res = (cm->width <= 352 && cm->height <= 288); |
1312 | 0 | int variance4x4downsample[16]; |
1313 | 0 | int segment_id; |
1314 | 0 | int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); |
1315 | | |
1316 | | // For SVC: check if LAST frame is NULL or if the resolution of LAST is |
1317 | | // different than the current frame resolution, and if so, treat this frame |
1318 | | // as a key frame, for the purpose of the superblock partitioning. |
1319 | | // LAST == NULL can happen in some cases where enhancement spatial layers are |
1320 | | // enabled dyanmically in the stream and the only reference is the spatial |
1321 | | // reference (GOLDEN). |
1322 | 0 | if (cpi->use_svc) { |
1323 | 0 | const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME); |
1324 | 0 | if (ref == NULL || ref->y_crop_height != cm->height || |
1325 | 0 | ref->y_crop_width != cm->width) |
1326 | 0 | is_key_frame = 1; |
1327 | 0 | } |
1328 | |
|
1329 | 0 | set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); |
1330 | 0 | set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); |
1331 | 0 | segment_id = xd->mi[0]->segment_id; |
1332 | |
|
1333 | 0 | if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) |
1334 | 0 | compute_minmax_variance = 0; |
1335 | |
|
1336 | 0 | memset(x->variance_low, 0, sizeof(x->variance_low)); |
1337 | |
|
1338 | 0 | if (cpi->sf.use_source_sad && !is_key_frame) { |
1339 | 0 | int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
1340 | 0 | content_state = x->content_state_sb; |
1341 | 0 | x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || |
1342 | 0 | content_state == kLowSadHighSumdiff) |
1343 | 0 | ? 1 |
1344 | 0 | : 0; |
1345 | 0 | x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; |
1346 | 0 | if (cpi->content_state_sb_fd != NULL) |
1347 | 0 | x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; |
1348 | | |
1349 | | // For SVC on top spatial layer: use/scale the partition from |
1350 | | // the lower spatial resolution if svc_use_lowres_part is enabled. |
1351 | 0 | if (cpi->sf.svc_use_lowres_part && |
1352 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && |
1353 | 0 | cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) { |
1354 | 0 | if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1, |
1355 | 0 | mi_col >> 1, mi_row, mi_col)) { |
1356 | 0 | if (cpi->sf.copy_partition_flag) { |
1357 | 0 | update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
1358 | 0 | } |
1359 | 0 | return 0; |
1360 | 0 | } |
1361 | 0 | } |
1362 | | // If source_sad is low copy the partition without computing the y_sad. |
1363 | 0 | if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && |
1364 | 0 | !force_64_split && |
1365 | 0 | copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { |
1366 | 0 | x->sb_use_mv_part = 1; |
1367 | 0 | if (cpi->sf.svc_use_lowres_part && |
1368 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1369 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1370 | 0 | return 0; |
1371 | 0 | } |
1372 | 0 | } |
1373 | | |
1374 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && |
1375 | 0 | cyclic_refresh_segment_id_boosted(segment_id)) { |
1376 | 0 | int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); |
1377 | 0 | set_vbp_thresholds(cpi, thresholds, q, content_state); |
1378 | 0 | } else { |
1379 | 0 | set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); |
1380 | 0 | } |
1381 | | // Decrease 32x32 split threshold for screen on base layer, for scene |
1382 | | // change/high motion frames. |
1383 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
1384 | 0 | cpi->svc.spatial_layer_id == 0 && force_64_split) |
1385 | 0 | thresholds[1] = 3 * thresholds[1] >> 2; |
1386 | | |
1387 | | // For non keyframes, disable 4x4 average for low resolution when speed = 8 |
1388 | 0 | threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; |
1389 | |
|
1390 | 0 | if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); |
1391 | 0 | if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); |
1392 | |
|
1393 | 0 | s = x->plane[0].src.buf; |
1394 | 0 | sp = x->plane[0].src.stride; |
1395 | | |
1396 | | // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, |
1397 | | // 5-20 for the 16x16 blocks. |
1398 | 0 | force_split[0] = force_64_split; |
1399 | |
|
1400 | 0 | if (!is_key_frame) { |
1401 | | // In the case of spatial/temporal scalable coding, the assumption here is |
1402 | | // that the temporal reference frame will always be of type LAST_FRAME. |
1403 | | // TODO(marpan): If that assumption is broken, we need to revisit this code. |
1404 | 0 | MODE_INFO *mi = xd->mi[0]; |
1405 | 0 | YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); |
1406 | |
|
1407 | 0 | const YV12_BUFFER_CONFIG *yv12_g = NULL; |
1408 | 0 | unsigned int y_sad_g, y_sad_thr, y_sad_last; |
1409 | 0 | bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + |
1410 | 0 | (mi_row + 4 < cm->mi_rows); |
1411 | |
|
1412 | 0 | assert(yv12 != NULL); |
1413 | |
|
1414 | 0 | if (!(is_one_pass_svc(cpi) && cpi->svc.spatial_layer_id) || |
1415 | 0 | cpi->svc.use_gf_temporal_ref_current_layer) { |
1416 | | // For now, GOLDEN will not be used for non-zero spatial layers, since |
1417 | | // it may not be a temporal reference. |
1418 | 0 | yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); |
1419 | 0 | } |
1420 | | |
1421 | | // Only compute y_sad_g (sad for golden reference) for speed < 8. |
1422 | 0 | if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && |
1423 | 0 | (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { |
1424 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
1425 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
1426 | 0 | y_sad_g = cpi->fn_ptr[bsize].sdf( |
1427 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
1428 | 0 | xd->plane[0].pre[0].stride); |
1429 | 0 | } else { |
1430 | 0 | y_sad_g = UINT_MAX; |
1431 | 0 | } |
1432 | |
|
1433 | 0 | if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && |
1434 | 0 | cpi->rc.is_src_frame_alt_ref) { |
1435 | 0 | yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); |
1436 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
1437 | 0 | &cm->frame_refs[ALTREF_FRAME - 1].sf); |
1438 | 0 | mi->ref_frame[0] = ALTREF_FRAME; |
1439 | 0 | y_sad_g = UINT_MAX; |
1440 | 0 | } else { |
1441 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
1442 | 0 | &cm->frame_refs[LAST_FRAME - 1].sf); |
1443 | 0 | mi->ref_frame[0] = LAST_FRAME; |
1444 | 0 | } |
1445 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
1446 | 0 | mi->sb_type = BLOCK_64X64; |
1447 | 0 | mi->mv[0].as_int = 0; |
1448 | 0 | mi->interp_filter = BILINEAR; |
1449 | |
|
1450 | 0 | if (cpi->oxcf.speed >= 8 && !low_res && |
1451 | 0 | x->content_state_sb != kVeryHighSad) { |
1452 | 0 | y_sad = cpi->fn_ptr[bsize].sdf( |
1453 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
1454 | 0 | xd->plane[0].pre[0].stride); |
1455 | 0 | } else { |
1456 | 0 | const MV dummy_mv = { 0, 0 }; |
1457 | 0 | y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, |
1458 | 0 | &dummy_mv); |
1459 | 0 | x->sb_use_mv_part = 1; |
1460 | 0 | x->sb_mvcol_part = mi->mv[0].as_mv.col; |
1461 | 0 | x->sb_mvrow_part = mi->mv[0].as_mv.row; |
1462 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && |
1463 | 0 | cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && |
1464 | 0 | cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && |
1465 | 0 | cm->width > 640 && cm->height > 480) { |
1466 | | // Disable split below 16x16 block size when scroll motion (horz or |
1467 | | // vert) is detected. |
1468 | | // TODO(marpan/jianj): Improve this condition: issue is that search |
1469 | | // range is hard-coded/limited in vp9_int_pro_motion_estimation() so |
1470 | | // scroll motion may not be detected here. |
1471 | 0 | if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || |
1472 | 0 | (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) && |
1473 | 0 | y_sad < 100000) { |
1474 | 0 | compute_minmax_variance = 0; |
1475 | 0 | thresholds[2] = INT64_MAX; |
1476 | 0 | } |
1477 | 0 | } |
1478 | 0 | } |
1479 | |
|
1480 | 0 | y_sad_last = y_sad; |
1481 | | // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad |
1482 | | // are close if short_circuit_low_temp_var is on. |
1483 | 0 | y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; |
1484 | 0 | if (y_sad_g < y_sad_thr) { |
1485 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
1486 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
1487 | 0 | mi->ref_frame[0] = GOLDEN_FRAME; |
1488 | 0 | mi->mv[0].as_int = 0; |
1489 | 0 | y_sad = y_sad_g; |
1490 | 0 | ref_frame_partition = GOLDEN_FRAME; |
1491 | 0 | } else { |
1492 | 0 | x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; |
1493 | 0 | ref_frame_partition = LAST_FRAME; |
1494 | 0 | } |
1495 | |
|
1496 | 0 | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
1497 | 0 | vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); |
1498 | |
|
1499 | 0 | if (cpi->use_skin_detection) |
1500 | 0 | x->sb_is_skin = skin_sb_split(cpi, low_res, mi_row, mi_col, force_split); |
1501 | |
|
1502 | 0 | d = xd->plane[0].dst.buf; |
1503 | 0 | dp = xd->plane[0].dst.stride; |
1504 | | |
1505 | | // If the y_sad is very small, take 64x64 as partition and exit. |
1506 | | // Don't check on boosted segment for now, as 64x64 is suppressed there. |
1507 | 0 | if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { |
1508 | 0 | const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
1509 | 0 | const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
1510 | 0 | if (mi_col + block_width / 2 < cm->mi_cols && |
1511 | 0 | mi_row + block_height / 2 < cm->mi_rows) { |
1512 | 0 | set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); |
1513 | 0 | x->variance_low[0] = 1; |
1514 | 0 | chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
1515 | 0 | if (cpi->sf.svc_use_lowres_part && |
1516 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1517 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1518 | 0 | if (cpi->sf.copy_partition_flag) { |
1519 | 0 | update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
1520 | 0 | } |
1521 | 0 | return 0; |
1522 | 0 | } |
1523 | 0 | } |
1524 | | |
1525 | | // If the y_sad is small enough, copy the partition of the superblock in the |
1526 | | // last frame to current frame only if the last frame is not a keyframe. |
1527 | | // Stop the copy every cpi->max_copied_frame to refresh the partition. |
1528 | | // TODO(jianj) : tune the threshold. |
1529 | 0 | if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy && |
1530 | 0 | copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { |
1531 | 0 | chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
1532 | 0 | if (cpi->sf.svc_use_lowres_part && |
1533 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1534 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1535 | 0 | return 0; |
1536 | 0 | } |
1537 | 0 | } else { |
1538 | 0 | d = VP9_VAR_OFFS; |
1539 | 0 | dp = 0; |
1540 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1541 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1542 | 0 | switch (xd->bd) { |
1543 | 0 | case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break; |
1544 | 0 | case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break; |
1545 | 0 | case 8: |
1546 | 0 | default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break; |
1547 | 0 | } |
1548 | 0 | } |
1549 | 0 | #endif // CONFIG_VP9_HIGHBITDEPTH |
1550 | 0 | } |
1551 | | |
1552 | 0 | if (low_res && threshold_4x4avg < INT64_MAX) |
1553 | 0 | CHECK_MEM_ERROR(&cm->error, vt2, vpx_calloc(16, sizeof(*vt2))); |
1554 | | // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances |
1555 | | // for splits. |
1556 | 0 | for (i = 0; i < 4; i++) { |
1557 | 0 | const int x32_idx = ((i & 1) << 5); |
1558 | 0 | const int y32_idx = ((i >> 1) << 5); |
1559 | 0 | const int i2 = i << 2; |
1560 | 0 | force_split[i + 1] = 0; |
1561 | 0 | avg_16x16[i] = 0; |
1562 | 0 | maxvar_16x16[i] = 0; |
1563 | 0 | minvar_16x16[i] = INT_MAX; |
1564 | 0 | for (j = 0; j < 4; j++) { |
1565 | 0 | const int x16_idx = x32_idx + ((j & 1) << 4); |
1566 | 0 | const int y16_idx = y32_idx + ((j >> 1) << 4); |
1567 | 0 | const int split_index = 5 + i2 + j; |
1568 | 0 | v16x16 *vst = &vt.split[i].split[j]; |
1569 | 0 | force_split[split_index] = 0; |
1570 | 0 | variance4x4downsample[i2 + j] = 0; |
1571 | 0 | if (!is_key_frame) { |
1572 | 0 | fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, |
1573 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1574 | 0 | xd->cur_buf->flags, |
1575 | 0 | #endif |
1576 | 0 | pixels_wide, pixels_high, is_key_frame); |
1577 | 0 | fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); |
1578 | 0 | get_variance(&vt.split[i].split[j].part_variances.none); |
1579 | 0 | avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; |
1580 | 0 | if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) |
1581 | 0 | minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; |
1582 | 0 | if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) |
1583 | 0 | maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; |
1584 | 0 | if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { |
1585 | | // 16X16 variance is above threshold for split, so force split to 8x8 |
1586 | | // for this 16x16 block (this also forces splits for upper levels). |
1587 | 0 | force_split[split_index] = 1; |
1588 | 0 | force_split[i + 1] = 1; |
1589 | 0 | force_split[0] = 1; |
1590 | 0 | } else if (compute_minmax_variance && |
1591 | 0 | vt.split[i].split[j].part_variances.none.variance > |
1592 | 0 | thresholds[1] && |
1593 | 0 | !cyclic_refresh_segment_id_boosted(segment_id)) { |
1594 | | // We have some nominal amount of 16x16 variance (based on average), |
1595 | | // compute the minmax over the 8x8 sub-blocks, and if above threshold, |
1596 | | // force split to 8x8 block for this 16x16 block. |
1597 | 0 | int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, |
1598 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1599 | 0 | xd->cur_buf->flags, |
1600 | 0 | #endif |
1601 | 0 | pixels_wide, pixels_high); |
1602 | 0 | int thresh_minmax = (int)cpi->vbp_threshold_minmax; |
1603 | 0 | if (x->content_state_sb == kVeryHighSad) |
1604 | 0 | thresh_minmax = thresh_minmax << 1; |
1605 | 0 | if (minmax > thresh_minmax) { |
1606 | 0 | force_split[split_index] = 1; |
1607 | 0 | force_split[i + 1] = 1; |
1608 | 0 | force_split[0] = 1; |
1609 | 0 | } |
1610 | 0 | } |
1611 | 0 | } |
1612 | 0 | if (is_key_frame || |
1613 | 0 | (low_res && vt.split[i].split[j].part_variances.none.variance > |
1614 | 0 | threshold_4x4avg)) { |
1615 | 0 | force_split[split_index] = 0; |
1616 | | // Go down to 4x4 down-sampling for variance. |
1617 | 0 | variance4x4downsample[i2 + j] = 1; |
1618 | 0 | for (k = 0; k < 4; k++) { |
1619 | 0 | int x8_idx = x16_idx + ((k & 1) << 3); |
1620 | 0 | int y8_idx = y16_idx + ((k >> 1) << 3); |
1621 | 0 | v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; |
1622 | 0 | fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, |
1623 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1624 | 0 | xd->cur_buf->flags, |
1625 | 0 | #endif |
1626 | 0 | pixels_wide, pixels_high, is_key_frame); |
1627 | 0 | } |
1628 | 0 | } |
1629 | 0 | } |
1630 | 0 | } |
1631 | 0 | if (cpi->noise_estimate.enabled) |
1632 | 0 | noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); |
1633 | | // Fill the rest of the variance tree by summing split partition values. |
1634 | 0 | avg_32x32 = 0; |
1635 | 0 | for (i = 0; i < 4; i++) { |
1636 | 0 | const int i2 = i << 2; |
1637 | 0 | for (j = 0; j < 4; j++) { |
1638 | 0 | if (variance4x4downsample[i2 + j] == 1) { |
1639 | 0 | v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j]; |
1640 | 0 | for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); |
1641 | 0 | fill_variance_tree(vtemp, BLOCK_16X16); |
1642 | | // If variance of this 16x16 block is above the threshold, force block |
1643 | | // to split. This also forces a split on the upper levels. |
1644 | 0 | get_variance(&vtemp->part_variances.none); |
1645 | 0 | if (vtemp->part_variances.none.variance > thresholds[2]) { |
1646 | 0 | force_split[5 + i2 + j] = 1; |
1647 | 0 | force_split[i + 1] = 1; |
1648 | 0 | force_split[0] = 1; |
1649 | 0 | } |
1650 | 0 | } |
1651 | 0 | } |
1652 | 0 | fill_variance_tree(&vt.split[i], BLOCK_32X32); |
1653 | | // If variance of this 32x32 block is above the threshold, or if its above |
1654 | | // (some threshold of) the average variance over the sub-16x16 blocks, then |
1655 | | // force this block to split. This also forces a split on the upper |
1656 | | // (64x64) level. |
1657 | 0 | if (!force_split[i + 1]) { |
1658 | 0 | get_variance(&vt.split[i].part_variances.none); |
1659 | 0 | var_32x32 = vt.split[i].part_variances.none.variance; |
1660 | 0 | max_var_32x32 = VPXMAX(var_32x32, max_var_32x32); |
1661 | 0 | min_var_32x32 = VPXMIN(var_32x32, min_var_32x32); |
1662 | 0 | if (vt.split[i].part_variances.none.variance > thresholds[1] || |
1663 | 0 | (!is_key_frame && |
1664 | 0 | vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && |
1665 | 0 | vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { |
1666 | 0 | force_split[i + 1] = 1; |
1667 | 0 | force_split[0] = 1; |
1668 | 0 | } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && |
1669 | 0 | (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && |
1670 | 0 | maxvar_16x16[i] > thresholds[1]) { |
1671 | 0 | force_split[i + 1] = 1; |
1672 | 0 | force_split[0] = 1; |
1673 | 0 | } |
1674 | 0 | avg_32x32 += var_32x32; |
1675 | 0 | } |
1676 | 0 | } |
1677 | 0 | if (!force_split[0]) { |
1678 | 0 | fill_variance_tree(&vt, BLOCK_64X64); |
1679 | 0 | get_variance(&vt.part_variances.none); |
1680 | | // If variance of this 64x64 block is above (some threshold of) the average |
1681 | | // variance over the sub-32x32 blocks, then force this block to split. |
1682 | | // Only checking this for noise level >= medium for now. |
1683 | 0 | if (!is_key_frame && noise_level >= kMedium && |
1684 | 0 | vt.part_variances.none.variance > (9 * avg_32x32) >> 5) |
1685 | 0 | force_split[0] = 1; |
1686 | | // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in |
1687 | | // a 64x64 block is greater than threshold and the maximum 32x32 variance is |
1688 | | // above a miniumum threshold, then force the split of a 64x64 block |
1689 | | // Only check this for low noise. |
1690 | 0 | else if (!is_key_frame && noise_level < kMedium && |
1691 | 0 | (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) && |
1692 | 0 | max_var_32x32 > thresholds[0] >> 1) |
1693 | 0 | force_split[0] = 1; |
1694 | 0 | } |
1695 | | |
1696 | | // Now go through the entire structure, splitting every block size until |
1697 | | // we get to one that's got a variance lower than our threshold. |
1698 | 0 | if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || |
1699 | 0 | !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, |
1700 | 0 | thresholds[0], BLOCK_16X16, force_split[0])) { |
1701 | 0 | for (i = 0; i < 4; ++i) { |
1702 | 0 | const int x32_idx = ((i & 1) << 2); |
1703 | 0 | const int y32_idx = ((i >> 1) << 2); |
1704 | 0 | const int i2 = i << 2; |
1705 | 0 | if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, |
1706 | 0 | (mi_row + y32_idx), (mi_col + x32_idx), |
1707 | 0 | thresholds[1], BLOCK_16X16, |
1708 | 0 | force_split[i + 1])) { |
1709 | 0 | for (j = 0; j < 4; ++j) { |
1710 | 0 | const int x16_idx = ((j & 1) << 1); |
1711 | 0 | const int y16_idx = ((j >> 1) << 1); |
1712 | | // For inter frames: if variance4x4downsample[] == 1 for this 16x16 |
1713 | | // block, then the variance is based on 4x4 down-sampling, so use vt2 |
1714 | | // in set_vt_partitioning(), otherwise use vt. |
1715 | 0 | v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1) |
1716 | 0 | ? &vt2[i2 + j] |
1717 | 0 | : &vt.split[i].split[j]; |
1718 | 0 | if (!set_vt_partitioning( |
1719 | 0 | cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, |
1720 | 0 | mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min, |
1721 | 0 | force_split[5 + i2 + j])) { |
1722 | 0 | for (k = 0; k < 4; ++k) { |
1723 | 0 | const int x8_idx = (k & 1); |
1724 | 0 | const int y8_idx = (k >> 1); |
1725 | 0 | if (use_4x4_partition) { |
1726 | 0 | if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], |
1727 | 0 | BLOCK_8X8, |
1728 | 0 | mi_row + y32_idx + y16_idx + y8_idx, |
1729 | 0 | mi_col + x32_idx + x16_idx + x8_idx, |
1730 | 0 | thresholds[3], BLOCK_8X8, 0)) { |
1731 | 0 | set_block_size( |
1732 | 0 | cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), |
1733 | 0 | (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4); |
1734 | 0 | } |
1735 | 0 | } else { |
1736 | 0 | set_block_size( |
1737 | 0 | cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), |
1738 | 0 | (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); |
1739 | 0 | } |
1740 | 0 | } |
1741 | 0 | } |
1742 | 0 | } |
1743 | 0 | } |
1744 | 0 | } |
1745 | 0 | } |
1746 | |
|
1747 | 0 | if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { |
1748 | 0 | update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); |
1749 | 0 | } |
1750 | |
|
1751 | 0 | if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && |
1752 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) |
1753 | 0 | update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); |
1754 | |
|
1755 | 0 | if (cpi->sf.short_circuit_low_temp_var) { |
1756 | 0 | set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, |
1757 | 0 | mi_col, mi_row); |
1758 | 0 | } |
1759 | |
|
1760 | 0 | chroma_check(cpi, x, bsize, y_sad, is_key_frame, scene_change_detected); |
1761 | 0 | if (vt2) vpx_free(vt2); |
1762 | 0 | return 0; |
1763 | 0 | } |
1764 | | |
1765 | | #if !CONFIG_REALTIME_ONLY |
1766 | | static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, |
1767 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
1768 | 8.50M | int output_enabled) { |
1769 | 8.50M | int i, x_idx, y; |
1770 | 8.50M | VP9_COMMON *const cm = &cpi->common; |
1771 | 8.50M | RD_COUNTS *const rdc = &td->rd_counts; |
1772 | 8.50M | MACROBLOCK *const x = &td->mb; |
1773 | 8.50M | MACROBLOCKD *const xd = &x->e_mbd; |
1774 | 8.50M | struct macroblock_plane *const p = x->plane; |
1775 | 8.50M | struct macroblockd_plane *const pd = xd->plane; |
1776 | 8.50M | MODE_INFO *mi = &ctx->mic; |
1777 | 8.50M | MODE_INFO *const xdmi = xd->mi[0]; |
1778 | 8.50M | MODE_INFO *mi_addr = xd->mi[0]; |
1779 | 8.50M | const struct segmentation *const seg = &cm->seg; |
1780 | 8.50M | const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; |
1781 | 8.50M | const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; |
1782 | 8.50M | const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); |
1783 | 8.50M | const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); |
1784 | 8.50M | MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; |
1785 | 8.50M | int w, h; |
1786 | | |
1787 | 8.50M | const int mis = cm->mi_stride; |
1788 | 8.50M | const int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
1789 | 8.50M | const int mi_height = num_8x8_blocks_high_lookup[bsize]; |
1790 | 8.50M | int max_plane; |
1791 | | |
1792 | 8.50M | assert(mi->sb_type == bsize); |
1793 | | |
1794 | 8.50M | *mi_addr = *mi; |
1795 | 8.50M | *x->mbmi_ext = ctx->mbmi_ext; |
1796 | | |
1797 | | // If segmentation in use |
1798 | 8.50M | if (seg->enabled) { |
1799 | | // For in frame complexity AQ copy the segment id from the segment map. |
1800 | 0 | if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { |
1801 | 0 | const uint8_t *const map = |
1802 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
1803 | 0 | mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
1804 | 0 | } |
1805 | | // Else for cyclic refresh mode update the segment map, set the segment id |
1806 | | // and then update the quantizer. |
1807 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && |
1808 | 0 | cpi->cyclic_refresh->content_mode) { |
1809 | 0 | vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, |
1810 | 0 | ctx->rate, ctx->dist, x->skip, p); |
1811 | 0 | } |
1812 | 0 | } |
1813 | | |
1814 | 8.50M | max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; |
1815 | 21.2M | for (i = 0; i < max_plane; ++i) { |
1816 | 12.7M | p[i].coeff = ctx->coeff_pbuf[i][1]; |
1817 | 12.7M | p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; |
1818 | 12.7M | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; |
1819 | 12.7M | p[i].eobs = ctx->eobs_pbuf[i][1]; |
1820 | 12.7M | } |
1821 | | |
1822 | 21.2M | for (i = max_plane; i < MAX_MB_PLANE; ++i) { |
1823 | 12.7M | p[i].coeff = ctx->coeff_pbuf[i][2]; |
1824 | 12.7M | p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; |
1825 | 12.7M | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; |
1826 | 12.7M | p[i].eobs = ctx->eobs_pbuf[i][2]; |
1827 | 12.7M | } |
1828 | | |
1829 | | // Restore the coding context of the MB to that that was in place |
1830 | | // when the mode was picked for it |
1831 | 18.8M | for (y = 0; y < mi_height; y++) |
1832 | 26.0M | for (x_idx = 0; x_idx < mi_width; x_idx++) |
1833 | 15.7M | if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && |
1834 | 15.5M | (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { |
1835 | 15.2M | xd->mi[x_idx + y * mis] = mi_addr; |
1836 | 15.2M | } |
1837 | | |
1838 | 8.50M | if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); |
1839 | | |
1840 | 8.50M | if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { |
1841 | 788k | xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; |
1842 | 788k | xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; |
1843 | 788k | } |
1844 | | |
1845 | 8.50M | x->skip = ctx->skip; |
1846 | 8.50M | memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, |
1847 | 8.50M | sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); |
1848 | | |
1849 | 8.50M | if (!output_enabled) return; |
1850 | | |
1851 | | #if CONFIG_INTERNAL_STATS |
1852 | | if (frame_is_intra_only(cm)) { |
1853 | | static const int kf_mode_index[] = { |
1854 | | THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/, |
1855 | | THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/, |
1856 | | THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/, |
1857 | | THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/, |
1858 | | THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, |
1859 | | }; |
1860 | | ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; |
1861 | | } else { |
1862 | | // Note how often each mode chosen as best |
1863 | | ++cpi->mode_chosen_counts[ctx->best_mode_index]; |
1864 | | } |
1865 | | #endif |
1866 | 2.04M | if (!frame_is_intra_only(cm)) { |
1867 | 1.36M | if (is_inter_block(xdmi)) { |
1868 | 516k | vp9_update_mv_count(td); |
1869 | | |
1870 | 516k | if (cm->interp_filter == SWITCHABLE) { |
1871 | 263k | const int ctx_interp = get_pred_context_switchable_interp(xd); |
1872 | 263k | ++td->counts->switchable_interp[ctx_interp][xdmi->interp_filter]; |
1873 | 263k | } |
1874 | 516k | } |
1875 | | |
1876 | 1.36M | rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; |
1877 | 1.36M | rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; |
1878 | 1.36M | rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; |
1879 | | |
1880 | 6.80M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) |
1881 | 5.44M | rdc->filter_diff[i] += ctx->best_filter_diff[i]; |
1882 | 1.36M | } |
1883 | | |
1884 | 4.51M | for (h = 0; h < y_mis; ++h) { |
1885 | 2.46M | MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; |
1886 | 6.59M | for (w = 0; w < x_mis; ++w) { |
1887 | 4.13M | MV_REF *const mv = frame_mv + w; |
1888 | 4.13M | mv->ref_frame[0] = mi->ref_frame[0]; |
1889 | 4.13M | mv->ref_frame[1] = mi->ref_frame[1]; |
1890 | 4.13M | mv->mv[0].as_int = mi->mv[0].as_int; |
1891 | 4.13M | mv->mv[1].as_int = mi->mv[1].as_int; |
1892 | 4.13M | } |
1893 | 2.46M | } |
1894 | 2.04M | } |
1895 | | #endif // !CONFIG_REALTIME_ONLY |
1896 | | |
1897 | | void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, |
1898 | 33.9M | int mi_row, int mi_col) { |
1899 | 33.9M | uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; |
1900 | 33.9M | const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; |
1901 | 33.9M | int i; |
1902 | | |
1903 | | // Set current frame pointer. |
1904 | 33.9M | x->e_mbd.cur_buf = src; |
1905 | | |
1906 | 135M | for (i = 0; i < MAX_MB_PLANE; i++) |
1907 | 101M | setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, |
1908 | 101M | NULL, x->e_mbd.plane[i].subsampling_x, |
1909 | 101M | x->e_mbd.plane[i].subsampling_y); |
1910 | 33.9M | } |
1911 | | |
1912 | | static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, |
1913 | | INTERP_FILTER interp_filter, |
1914 | 0 | RD_COST *rd_cost, BLOCK_SIZE bsize) { |
1915 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
1916 | 0 | MODE_INFO *const mi = xd->mi[0]; |
1917 | 0 | INTERP_FILTER filter_ref; |
1918 | |
|
1919 | 0 | filter_ref = get_pred_context_switchable_interp(xd); |
1920 | 0 | if (interp_filter == BILINEAR) |
1921 | 0 | filter_ref = BILINEAR; |
1922 | 0 | else if (filter_ref == SWITCHABLE_FILTERS) |
1923 | 0 | filter_ref = EIGHTTAP; |
1924 | |
|
1925 | 0 | mi->sb_type = bsize; |
1926 | 0 | mi->mode = ZEROMV; |
1927 | 0 | mi->tx_size = |
1928 | 0 | VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); |
1929 | 0 | mi->skip = 1; |
1930 | 0 | mi->uv_mode = DC_PRED; |
1931 | 0 | mi->ref_frame[0] = LAST_FRAME; |
1932 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
1933 | 0 | mi->mv[0].as_int = 0; |
1934 | 0 | mi->interp_filter = filter_ref; |
1935 | |
|
1936 | 0 | xd->mi[0]->bmi[0].as_mv[0].as_int = 0; |
1937 | 0 | x->skip = 1; |
1938 | |
|
1939 | 0 | vp9_rd_cost_init(rd_cost); |
1940 | 0 | } |
1941 | | |
1942 | | #if !CONFIG_REALTIME_ONLY |
1943 | | static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, |
1944 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
1945 | 10.8M | AQ_MODE aq_mode) { |
1946 | 10.8M | VP9_COMMON *const cm = &cpi->common; |
1947 | 10.8M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
1948 | 10.8M | const uint8_t *const map = |
1949 | 10.8M | cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
1950 | | |
1951 | 10.8M | vp9_init_plane_quantizers(cpi, x); |
1952 | 10.8M | vpx_clear_system_state(); |
1953 | | |
1954 | 10.8M | if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { |
1955 | 10.8M | if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; |
1956 | 10.8M | } else if (aq_mode == PERCEPTUAL_AQ) { |
1957 | 0 | x->rdmult = x->cb_rdmult; |
1958 | 0 | } else if (aq_mode == CYCLIC_REFRESH_AQ) { |
1959 | | // If segment is boosted, use rdmult for that segment. |
1960 | 0 | if (cyclic_refresh_segment_id_boosted( |
1961 | 0 | get_segment_id(cm, map, bsize, mi_row, mi_col))) |
1962 | 0 | x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); |
1963 | 0 | } else { |
1964 | 0 | x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); |
1965 | 0 | } |
1966 | | |
1967 | 10.8M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
1968 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
1969 | 0 | } |
1970 | 10.8M | } |
1971 | | |
1972 | | static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, |
1973 | | MACROBLOCK *const x, int mi_row, int mi_col, |
1974 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
1975 | | PICK_MODE_CONTEXT *ctx, int rate_in_best_rd, |
1976 | 10.8M | int64_t dist_in_best_rd) { |
1977 | 10.8M | VP9_COMMON *const cm = &cpi->common; |
1978 | 10.8M | TileInfo *const tile_info = &tile_data->tile_info; |
1979 | 10.8M | MACROBLOCKD *const xd = &x->e_mbd; |
1980 | 10.8M | MODE_INFO *mi; |
1981 | 10.8M | struct macroblock_plane *const p = x->plane; |
1982 | 10.8M | struct macroblockd_plane *const pd = xd->plane; |
1983 | 10.8M | const AQ_MODE aq_mode = cpi->oxcf.aq_mode; |
1984 | 10.8M | int i, orig_rdmult; |
1985 | 10.8M | int64_t best_rd = INT64_MAX; |
1986 | | |
1987 | 10.8M | vpx_clear_system_state(); |
1988 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
1989 | | start_timing(cpi, rd_pick_sb_modes_time); |
1990 | | #endif |
1991 | | |
1992 | | // Use the lower precision, but faster, 32x32 fdct for mode selection. |
1993 | 10.8M | x->use_lp32x32fdct = 1; |
1994 | | |
1995 | 10.8M | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
1996 | 10.8M | mi = xd->mi[0]; |
1997 | 10.8M | mi->sb_type = bsize; |
1998 | | |
1999 | 43.2M | for (i = 0; i < MAX_MB_PLANE; ++i) { |
2000 | 32.4M | p[i].coeff = ctx->coeff_pbuf[i][0]; |
2001 | 32.4M | p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; |
2002 | 32.4M | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; |
2003 | 32.4M | p[i].eobs = ctx->eobs_pbuf[i][0]; |
2004 | 32.4M | } |
2005 | 10.8M | ctx->is_coded = 0; |
2006 | 10.8M | ctx->skippable = 0; |
2007 | 10.8M | ctx->pred_pixel_ready = 0; |
2008 | 10.8M | x->skip_recode = 0; |
2009 | | |
2010 | | // Set to zero to make sure we do not use the previous encoded frame stats |
2011 | 10.8M | mi->skip = 0; |
2012 | | |
2013 | 10.8M | #if CONFIG_VP9_HIGHBITDEPTH |
2014 | 10.8M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
2015 | 0 | x->source_variance = vp9_high_get_sby_perpixel_variance( |
2016 | 0 | cpi, &x->plane[0].src, bsize, xd->bd); |
2017 | 10.8M | } else { |
2018 | 10.8M | x->source_variance = |
2019 | 10.8M | vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); |
2020 | 10.8M | } |
2021 | | #else |
2022 | | x->source_variance = |
2023 | | vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); |
2024 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
2025 | | |
2026 | | // Save rdmult before it might be changed, so it can be restored later. |
2027 | 10.8M | orig_rdmult = x->rdmult; |
2028 | | |
2029 | 10.8M | if ((cpi->sf.tx_domain_thresh > 0.0) || |
2030 | 7.89M | (cpi->sf.trellis_opt_tx_rd.thresh > 0.0)) { |
2031 | 2.91M | double logvar = vp9_log_block_var(cpi, x, bsize); |
2032 | | // Check block complexity as part of decision on using pixel or transform |
2033 | | // domain distortion in rd tests. |
2034 | 2.91M | x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion && |
2035 | 2.91M | (logvar >= cpi->sf.tx_domain_thresh); |
2036 | | |
2037 | | // Store block complexity to decide on using quantized coefficient |
2038 | | // optimization inside the rd loop. |
2039 | 2.91M | x->log_block_src_var = logvar; |
2040 | 7.89M | } else { |
2041 | 7.89M | x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion; |
2042 | 7.89M | x->log_block_src_var = 0.0; |
2043 | 7.89M | } |
2044 | | |
2045 | 10.8M | set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); |
2046 | 10.8M | set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode); |
2047 | 10.8M | if (rate_in_best_rd < INT_MAX && dist_in_best_rd < INT64_MAX) { |
2048 | 10.3M | best_rd = vp9_calculate_rd_cost(x->rdmult, x->rddiv, rate_in_best_rd, |
2049 | 10.3M | dist_in_best_rd); |
2050 | 10.3M | } |
2051 | | |
2052 | | // Find best coding mode & reconstruct the MB so it is available |
2053 | | // as a predictor for MBs that follow in the SB |
2054 | 10.8M | if (frame_is_intra_only(cm)) { |
2055 | 4.11M | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); |
2056 | 6.70M | } else { |
2057 | 6.70M | if (bsize >= BLOCK_8X8) { |
2058 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2059 | | start_timing(cpi, vp9_rd_pick_inter_mode_sb_time); |
2060 | | #endif |
2061 | 3.65M | if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) |
2062 | 0 | vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, |
2063 | 0 | ctx, best_rd); |
2064 | 3.65M | else |
2065 | 3.65M | vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, |
2066 | 3.65M | bsize, ctx, best_rd); |
2067 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2068 | | end_timing(cpi, vp9_rd_pick_inter_mode_sb_time); |
2069 | | #endif |
2070 | 3.65M | } else { |
2071 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2072 | | start_timing(cpi, vp9_rd_pick_inter_mode_sub8x8_time); |
2073 | | #endif |
2074 | 3.04M | vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, |
2075 | 3.04M | bsize, ctx, best_rd); |
2076 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2077 | | end_timing(cpi, vp9_rd_pick_inter_mode_sub8x8_time); |
2078 | | #endif |
2079 | 3.04M | } |
2080 | 6.70M | } |
2081 | | |
2082 | | // Examine the resulting rate and for AQ mode 2 make a segment choice. |
2083 | 10.8M | if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && |
2084 | 0 | (bsize >= BLOCK_16X16) && |
2085 | 0 | (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || |
2086 | 0 | (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { |
2087 | 0 | vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); |
2088 | 0 | } |
2089 | | |
2090 | | // TODO(jingning) The rate-distortion optimization flow needs to be |
2091 | | // refactored to provide proper exit/return handle. |
2092 | 10.8M | if (rd_cost->rate == INT_MAX || rd_cost->dist == INT64_MAX) |
2093 | 3.64M | rd_cost->rdcost = INT64_MAX; |
2094 | 7.16M | else |
2095 | 7.16M | rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); |
2096 | | |
2097 | 10.8M | x->rdmult = orig_rdmult; |
2098 | | |
2099 | 10.8M | ctx->rate = rd_cost->rate; |
2100 | 10.8M | ctx->dist = rd_cost->dist; |
2101 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2102 | | end_timing(cpi, rd_pick_sb_modes_time); |
2103 | | #endif |
2104 | 10.8M | } |
2105 | | #endif // !CONFIG_REALTIME_ONLY |
2106 | | |
2107 | 2.04M | static void update_stats(VP9_COMMON *cm, ThreadData *td) { |
2108 | 2.04M | const MACROBLOCK *x = &td->mb; |
2109 | 2.04M | const MACROBLOCKD *const xd = &x->e_mbd; |
2110 | 2.04M | const MODE_INFO *const mi = xd->mi[0]; |
2111 | 2.04M | const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
2112 | 2.04M | const BLOCK_SIZE bsize = mi->sb_type; |
2113 | | |
2114 | 2.04M | if (!frame_is_intra_only(cm)) { |
2115 | 1.36M | FRAME_COUNTS *const counts = td->counts; |
2116 | 1.36M | const int inter_block = is_inter_block(mi); |
2117 | 1.36M | const int seg_ref_active = |
2118 | 1.36M | segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); |
2119 | 1.36M | if (!seg_ref_active) { |
2120 | 1.36M | counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; |
2121 | | // If the segment reference feature is enabled we have only a single |
2122 | | // reference frame allowed for the segment so exclude it from |
2123 | | // the reference frame counts used to work out probabilities. |
2124 | 1.36M | if (inter_block) { |
2125 | 516k | const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; |
2126 | 516k | if (cm->reference_mode == REFERENCE_MODE_SELECT) |
2127 | 0 | counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] |
2128 | 0 | [has_second_ref(mi)]++; |
2129 | | |
2130 | 516k | if (has_second_ref(mi)) { |
2131 | 0 | const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; |
2132 | 0 | const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); |
2133 | 0 | const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1]; |
2134 | 0 | counts->comp_ref[ctx][bit]++; |
2135 | 516k | } else { |
2136 | 516k | counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] |
2137 | 516k | [ref0 != LAST_FRAME]++; |
2138 | 516k | if (ref0 != LAST_FRAME) |
2139 | 183k | counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] |
2140 | 183k | [ref0 != GOLDEN_FRAME]++; |
2141 | 516k | } |
2142 | 516k | } |
2143 | 1.36M | } |
2144 | 1.36M | if (inter_block && |
2145 | 516k | !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { |
2146 | 516k | const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; |
2147 | 516k | if (bsize >= BLOCK_8X8) { |
2148 | 291k | const PREDICTION_MODE mode = mi->mode; |
2149 | 291k | ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; |
2150 | 291k | } else { |
2151 | 224k | const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; |
2152 | 224k | const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; |
2153 | 224k | int idx, idy; |
2154 | 656k | for (idy = 0; idy < 2; idy += num_4x4_h) { |
2155 | 1.20M | for (idx = 0; idx < 2; idx += num_4x4_w) { |
2156 | 768k | const int j = idy * 2 + idx; |
2157 | 768k | const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; |
2158 | 768k | ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; |
2159 | 768k | } |
2160 | 431k | } |
2161 | 224k | } |
2162 | 516k | } |
2163 | 1.36M | } |
2164 | 2.04M | } |
2165 | | |
2166 | | #if !CONFIG_REALTIME_ONLY |
2167 | | static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, |
2168 | | ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], |
2169 | | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], |
2170 | | PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], |
2171 | 11.1M | BLOCK_SIZE bsize) { |
2172 | 11.1M | MACROBLOCKD *const xd = &x->e_mbd; |
2173 | 11.1M | int p; |
2174 | 11.1M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
2175 | 11.1M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
2176 | 11.1M | int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
2177 | 11.1M | int mi_height = num_8x8_blocks_high_lookup[bsize]; |
2178 | 44.6M | for (p = 0; p < MAX_MB_PLANE; p++) { |
2179 | 33.4M | memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), |
2180 | 33.4M | a + num_4x4_blocks_wide * p, |
2181 | 33.4M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> |
2182 | 33.4M | xd->plane[p].subsampling_x); |
2183 | 33.4M | memcpy(xd->left_context[p] + |
2184 | 33.4M | ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), |
2185 | 33.4M | l + num_4x4_blocks_high * p, |
2186 | 33.4M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> |
2187 | 33.4M | xd->plane[p].subsampling_y); |
2188 | 33.4M | } |
2189 | 11.1M | memcpy(xd->above_seg_context + mi_col, sa, |
2190 | 11.1M | sizeof(*xd->above_seg_context) * mi_width); |
2191 | 11.1M | memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, |
2192 | 11.1M | sizeof(xd->left_seg_context[0]) * mi_height); |
2193 | 11.1M | } |
2194 | | |
2195 | | static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, |
2196 | | ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], |
2197 | | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], |
2198 | | PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], |
2199 | 4.05M | BLOCK_SIZE bsize) { |
2200 | 4.05M | const MACROBLOCKD *const xd = &x->e_mbd; |
2201 | 4.05M | int p; |
2202 | 4.05M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
2203 | 4.05M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
2204 | 4.05M | int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
2205 | 4.05M | int mi_height = num_8x8_blocks_high_lookup[bsize]; |
2206 | | |
2207 | | // buffer the above/left context information of the block in search. |
2208 | 16.2M | for (p = 0; p < MAX_MB_PLANE; ++p) { |
2209 | 12.1M | memcpy(a + num_4x4_blocks_wide * p, |
2210 | 12.1M | xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), |
2211 | 12.1M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> |
2212 | 12.1M | xd->plane[p].subsampling_x); |
2213 | 12.1M | memcpy(l + num_4x4_blocks_high * p, |
2214 | 12.1M | xd->left_context[p] + |
2215 | 12.1M | ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), |
2216 | 12.1M | (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> |
2217 | 12.1M | xd->plane[p].subsampling_y); |
2218 | 12.1M | } |
2219 | 4.05M | memcpy(sa, xd->above_seg_context + mi_col, |
2220 | 4.05M | sizeof(*xd->above_seg_context) * mi_width); |
2221 | 4.05M | memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), |
2222 | 4.05M | sizeof(xd->left_seg_context[0]) * mi_height); |
2223 | 4.05M | } |
2224 | | |
2225 | | static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, |
2226 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
2227 | | int output_enabled, BLOCK_SIZE bsize, |
2228 | 7.61M | PICK_MODE_CONTEXT *ctx) { |
2229 | 7.61M | MACROBLOCK *const x = &td->mb; |
2230 | 7.61M | set_offsets(cpi, tile, x, mi_row, mi_col, bsize); |
2231 | | |
2232 | 7.61M | if (cpi->sf.enable_tpl_model && |
2233 | 2.80M | (cpi->oxcf.aq_mode == NO_AQ || cpi->oxcf.aq_mode == PERCEPTUAL_AQ)) { |
2234 | 2.80M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
2235 | 2.80M | x->rdmult = x->cb_rdmult; |
2236 | 2.80M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
2237 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); |
2238 | 0 | } |
2239 | 2.80M | } |
2240 | | |
2241 | 7.61M | update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); |
2242 | 7.61M | encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); |
2243 | | |
2244 | 7.61M | if (output_enabled) { |
2245 | 2.04M | update_stats(&cpi->common, td); |
2246 | | |
2247 | 2.04M | (*tp)->token = EOSB_TOKEN; |
2248 | 2.04M | (*tp)++; |
2249 | 2.04M | } |
2250 | 7.61M | } |
2251 | | |
2252 | | static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, |
2253 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
2254 | 10.1M | int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
2255 | 10.1M | VP9_COMMON *const cm = &cpi->common; |
2256 | 10.1M | MACROBLOCK *const x = &td->mb; |
2257 | 10.1M | MACROBLOCKD *const xd = &x->e_mbd; |
2258 | | |
2259 | 10.1M | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
2260 | 10.1M | int ctx; |
2261 | 10.1M | PARTITION_TYPE partition; |
2262 | 10.1M | BLOCK_SIZE subsize = bsize; |
2263 | | |
2264 | 10.1M | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
2265 | | |
2266 | 9.22M | if (bsize >= BLOCK_8X8) { |
2267 | 9.22M | ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
2268 | 9.22M | subsize = get_subsize(bsize, pc_tree->partitioning); |
2269 | 9.22M | } else { |
2270 | 0 | ctx = 0; |
2271 | 0 | subsize = BLOCK_4X4; |
2272 | 0 | } |
2273 | | |
2274 | 9.22M | partition = partition_lookup[bsl][subsize]; |
2275 | 9.22M | if (output_enabled && bsize != BLOCK_4X4) |
2276 | 2.75M | td->counts->partition[ctx][partition]++; |
2277 | | |
2278 | 9.22M | switch (partition) { |
2279 | 3.19M | case PARTITION_NONE: |
2280 | 3.19M | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2281 | 3.19M | &pc_tree->none); |
2282 | 3.19M | break; |
2283 | 515k | case PARTITION_VERT: |
2284 | 515k | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2285 | 515k | &pc_tree->vertical[0]); |
2286 | 515k | if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
2287 | 52.9k | encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, |
2288 | 52.9k | subsize, &pc_tree->vertical[1]); |
2289 | 52.9k | } |
2290 | 515k | break; |
2291 | 901k | case PARTITION_HORZ: |
2292 | 901k | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2293 | 901k | &pc_tree->horizontal[0]); |
2294 | 901k | if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
2295 | 88.3k | encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, |
2296 | 88.3k | subsize, &pc_tree->horizontal[1]); |
2297 | 88.3k | } |
2298 | 901k | break; |
2299 | 4.61M | default: |
2300 | 4.61M | assert(partition == PARTITION_SPLIT); |
2301 | 4.61M | if (bsize == BLOCK_8X8) { |
2302 | 2.86M | encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, |
2303 | 2.86M | pc_tree->u.leaf_split[0]); |
2304 | 2.86M | } else { |
2305 | 1.74M | encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2306 | 1.74M | pc_tree->u.split[0]); |
2307 | 1.74M | encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
2308 | 1.74M | subsize, pc_tree->u.split[1]); |
2309 | 1.74M | encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
2310 | 1.74M | subsize, pc_tree->u.split[2]); |
2311 | 1.74M | encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, |
2312 | 1.74M | subsize, pc_tree->u.split[3]); |
2313 | 1.74M | } |
2314 | 4.61M | break; |
2315 | 9.22M | } |
2316 | | |
2317 | 9.22M | if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
2318 | 7.47M | update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
2319 | 9.22M | } |
2320 | | #endif // !CONFIG_REALTIME_ONLY |
2321 | | |
2322 | | // Check to see if the given partition size is allowed for a specified number |
2323 | | // of 8x8 block rows and columns remaining in the image. |
2324 | | // If not then return the largest allowed partition size |
2325 | | static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, |
2326 | 0 | int cols_left, int *bh, int *bw) { |
2327 | 0 | if (rows_left <= 0 || cols_left <= 0) { |
2328 | 0 | return VPXMIN(bsize, BLOCK_8X8); |
2329 | 0 | } else { |
2330 | 0 | for (; bsize > 0; bsize -= 3) { |
2331 | 0 | *bh = num_8x8_blocks_high_lookup[bsize]; |
2332 | 0 | *bw = num_8x8_blocks_wide_lookup[bsize]; |
2333 | 0 | if ((*bh <= rows_left) && (*bw <= cols_left)) { |
2334 | 0 | break; |
2335 | 0 | } |
2336 | 0 | } |
2337 | 0 | } |
2338 | 0 | return bsize; |
2339 | 0 | } |
2340 | | |
2341 | | static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in, |
2342 | | int bw_in, int row8x8_remaining, |
2343 | | int col8x8_remaining, BLOCK_SIZE bsize, |
2344 | 0 | MODE_INFO **mi_8x8) { |
2345 | 0 | int bh = bh_in; |
2346 | 0 | int r, c; |
2347 | 0 | for (r = 0; r < MI_BLOCK_SIZE; r += bh) { |
2348 | 0 | int bw = bw_in; |
2349 | 0 | for (c = 0; c < MI_BLOCK_SIZE; c += bw) { |
2350 | 0 | const int index = r * mis + c; |
2351 | 0 | mi_8x8[index] = mi + index; |
2352 | 0 | mi_8x8[index]->sb_type = find_partition_size( |
2353 | 0 | bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); |
2354 | 0 | } |
2355 | 0 | } |
2356 | 0 | } |
2357 | | |
2358 | | // This function attempts to set all mode info entries in a given SB64 |
2359 | | // to the same block partition size. |
2360 | | // However, at the bottom and right borders of the image the requested size |
2361 | | // may not be allowed in which case this code attempts to choose the largest |
2362 | | // allowable partition. |
2363 | | static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, |
2364 | | MODE_INFO **mi_8x8, int mi_row, int mi_col, |
2365 | 0 | BLOCK_SIZE bsize) { |
2366 | 0 | VP9_COMMON *const cm = &cpi->common; |
2367 | 0 | const int mis = cm->mi_stride; |
2368 | 0 | const int row8x8_remaining = tile->mi_row_end - mi_row; |
2369 | 0 | const int col8x8_remaining = tile->mi_col_end - mi_col; |
2370 | 0 | int block_row, block_col; |
2371 | 0 | MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; |
2372 | 0 | int bh = num_8x8_blocks_high_lookup[bsize]; |
2373 | 0 | int bw = num_8x8_blocks_wide_lookup[bsize]; |
2374 | |
|
2375 | 0 | assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); |
2376 | | |
2377 | | // Apply the requested partition size to the SB64 if it is all "in image" |
2378 | 0 | if ((col8x8_remaining >= MI_BLOCK_SIZE) && |
2379 | 0 | (row8x8_remaining >= MI_BLOCK_SIZE)) { |
2380 | 0 | for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { |
2381 | 0 | for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { |
2382 | 0 | int index = block_row * mis + block_col; |
2383 | 0 | mi_8x8[index] = mi_upper_left + index; |
2384 | 0 | mi_8x8[index]->sb_type = bsize; |
2385 | 0 | } |
2386 | 0 | } |
2387 | 0 | } else { |
2388 | | // Else this is a partial SB64. |
2389 | 0 | set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, |
2390 | 0 | col8x8_remaining, bsize, mi_8x8); |
2391 | 0 | } |
2392 | 0 | } |
2393 | | |
2394 | | static void update_state_rt(VP9_COMP *cpi, ThreadData *td, |
2395 | | PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, |
2396 | 0 | int bsize) { |
2397 | 0 | VP9_COMMON *const cm = &cpi->common; |
2398 | 0 | MACROBLOCK *const x = &td->mb; |
2399 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2400 | 0 | MODE_INFO *const mi = xd->mi[0]; |
2401 | 0 | struct macroblock_plane *const p = x->plane; |
2402 | 0 | const struct segmentation *const seg = &cm->seg; |
2403 | 0 | const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; |
2404 | 0 | const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; |
2405 | 0 | const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); |
2406 | 0 | const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); |
2407 | |
|
2408 | 0 | *(xd->mi[0]) = ctx->mic; |
2409 | 0 | *(x->mbmi_ext) = ctx->mbmi_ext; |
2410 | |
|
2411 | 0 | if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled || |
2412 | 0 | cpi->active_map.enabled)) { |
2413 | | // Setting segmentation map for cyclic_refresh. |
2414 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && |
2415 | 0 | cpi->cyclic_refresh->content_mode) { |
2416 | 0 | vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, |
2417 | 0 | ctx->rate, ctx->dist, x->skip, p); |
2418 | 0 | } else { |
2419 | 0 | const uint8_t *const map = |
2420 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
2421 | 0 | mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); |
2422 | 0 | } |
2423 | 0 | vp9_init_plane_quantizers(cpi, x); |
2424 | 0 | } |
2425 | |
|
2426 | 0 | if (is_inter_block(mi)) { |
2427 | 0 | vp9_update_mv_count(td); |
2428 | 0 | if (cm->interp_filter == SWITCHABLE) { |
2429 | 0 | const int pred_ctx = get_pred_context_switchable_interp(xd); |
2430 | 0 | ++td->counts->switchable_interp[pred_ctx][mi->interp_filter]; |
2431 | 0 | } |
2432 | |
|
2433 | 0 | if (mi->sb_type < BLOCK_8X8) { |
2434 | 0 | mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; |
2435 | 0 | mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; |
2436 | 0 | } |
2437 | 0 | } |
2438 | |
|
2439 | 0 | if (cm->use_prev_frame_mvs || !cm->error_resilient_mode || |
2440 | 0 | (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 && |
2441 | 0 | cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { |
2442 | 0 | MV_REF *const frame_mvs = |
2443 | 0 | cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; |
2444 | 0 | int w, h; |
2445 | |
|
2446 | 0 | for (h = 0; h < y_mis; ++h) { |
2447 | 0 | MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; |
2448 | 0 | for (w = 0; w < x_mis; ++w) { |
2449 | 0 | MV_REF *const mv = frame_mv + w; |
2450 | 0 | mv->ref_frame[0] = mi->ref_frame[0]; |
2451 | 0 | mv->ref_frame[1] = mi->ref_frame[1]; |
2452 | 0 | mv->mv[0].as_int = mi->mv[0].as_int; |
2453 | 0 | mv->mv[1].as_int = mi->mv[1].as_int; |
2454 | 0 | } |
2455 | 0 | } |
2456 | 0 | } |
2457 | |
|
2458 | 0 | x->skip = ctx->skip; |
2459 | 0 | x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0]; |
2460 | 0 | } |
2461 | | |
2462 | | static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, |
2463 | | const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, |
2464 | | int mi_col, int output_enabled, BLOCK_SIZE bsize, |
2465 | 0 | PICK_MODE_CONTEXT *ctx) { |
2466 | 0 | MACROBLOCK *const x = &td->mb; |
2467 | 0 | set_offsets(cpi, tile, x, mi_row, mi_col, bsize); |
2468 | 0 | update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); |
2469 | |
|
2470 | 0 | encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); |
2471 | 0 | update_stats(&cpi->common, td); |
2472 | |
|
2473 | 0 | (*tp)->token = EOSB_TOKEN; |
2474 | 0 | (*tp)++; |
2475 | 0 | } |
2476 | | |
2477 | | static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, |
2478 | | const TileInfo *const tile, TOKENEXTRA **tp, |
2479 | | int mi_row, int mi_col, int output_enabled, |
2480 | 0 | BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
2481 | 0 | VP9_COMMON *const cm = &cpi->common; |
2482 | 0 | MACROBLOCK *const x = &td->mb; |
2483 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2484 | |
|
2485 | 0 | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
2486 | 0 | int ctx; |
2487 | 0 | PARTITION_TYPE partition; |
2488 | 0 | BLOCK_SIZE subsize; |
2489 | |
|
2490 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
2491 | | |
2492 | 0 | if (bsize >= BLOCK_8X8) { |
2493 | 0 | const int idx_str = xd->mi_stride * mi_row + mi_col; |
2494 | 0 | MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; |
2495 | 0 | ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
2496 | 0 | subsize = mi_8x8[0]->sb_type; |
2497 | 0 | } else { |
2498 | 0 | ctx = 0; |
2499 | 0 | subsize = BLOCK_4X4; |
2500 | 0 | } |
2501 | |
|
2502 | 0 | partition = partition_lookup[bsl][subsize]; |
2503 | 0 | if (output_enabled && bsize != BLOCK_4X4) |
2504 | 0 | td->counts->partition[ctx][partition]++; |
2505 | |
|
2506 | 0 | switch (partition) { |
2507 | 0 | case PARTITION_NONE: |
2508 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2509 | 0 | &pc_tree->none); |
2510 | 0 | break; |
2511 | 0 | case PARTITION_VERT: |
2512 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2513 | 0 | &pc_tree->vertical[0]); |
2514 | 0 | if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
2515 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
2516 | 0 | subsize, &pc_tree->vertical[1]); |
2517 | 0 | } |
2518 | 0 | break; |
2519 | 0 | case PARTITION_HORZ: |
2520 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2521 | 0 | &pc_tree->horizontal[0]); |
2522 | 0 | if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
2523 | 0 | encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
2524 | 0 | subsize, &pc_tree->horizontal[1]); |
2525 | 0 | } |
2526 | 0 | break; |
2527 | 0 | default: |
2528 | 0 | assert(partition == PARTITION_SPLIT); |
2529 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
2530 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, |
2531 | 0 | pc_tree->u.split[0]); |
2532 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, |
2533 | 0 | subsize, pc_tree->u.split[1]); |
2534 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, |
2535 | 0 | subsize, pc_tree->u.split[2]); |
2536 | 0 | encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, |
2537 | 0 | output_enabled, subsize, pc_tree->u.split[3]); |
2538 | 0 | break; |
2539 | 0 | } |
2540 | | |
2541 | 0 | if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
2542 | 0 | update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
2543 | 0 | } |
2544 | | |
2545 | | #if !CONFIG_REALTIME_ONLY |
2546 | | static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, |
2547 | | TileDataEnc *tile_data, MODE_INFO **mi_8x8, |
2548 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
2549 | | BLOCK_SIZE bsize, int *rate, int64_t *dist, |
2550 | 0 | int do_recon, PC_TREE *pc_tree) { |
2551 | 0 | VP9_COMMON *const cm = &cpi->common; |
2552 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
2553 | 0 | MACROBLOCK *const x = &td->mb; |
2554 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
2555 | 0 | const int mis = cm->mi_stride; |
2556 | 0 | const int bsl = b_width_log2_lookup[bsize]; |
2557 | 0 | const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; |
2558 | 0 | const int bss = (1 << bsl) / 4; |
2559 | 0 | int i, pl; |
2560 | 0 | PARTITION_TYPE partition = PARTITION_NONE; |
2561 | 0 | BLOCK_SIZE subsize; |
2562 | 0 | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; |
2563 | 0 | PARTITION_CONTEXT sl[8], sa[8]; |
2564 | 0 | RD_COST last_part_rdc, none_rdc, chosen_rdc; |
2565 | 0 | BLOCK_SIZE sub_subsize = BLOCK_4X4; |
2566 | 0 | int splits_below = 0; |
2567 | 0 | BLOCK_SIZE bs_type = mi_8x8[0]->sb_type; |
2568 | 0 | int do_partition_search = 1; |
2569 | 0 | PICK_MODE_CONTEXT *ctx = &pc_tree->none; |
2570 | |
|
2571 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
2572 | | |
2573 | 0 | assert(num_4x4_blocks_wide_lookup[bsize] == |
2574 | 0 | num_4x4_blocks_high_lookup[bsize]); |
2575 | |
|
2576 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2577 | 0 | vp9_rd_cost_reset(&none_rdc); |
2578 | 0 | vp9_rd_cost_reset(&chosen_rdc); |
2579 | |
|
2580 | 0 | partition = partition_lookup[bsl][bs_type]; |
2581 | 0 | subsize = get_subsize(bsize, partition); |
2582 | |
|
2583 | 0 | pc_tree->partitioning = partition; |
2584 | 0 | save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2585 | |
|
2586 | 0 | if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) { |
2587 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
2588 | 0 | x->mb_energy = vp9_block_energy(cpi, x, bsize); |
2589 | 0 | } |
2590 | |
|
2591 | 0 | if (do_partition_search && |
2592 | 0 | cpi->sf.partition_search_type == SEARCH_PARTITION && |
2593 | 0 | cpi->sf.adjust_partitioning_from_last_frame) { |
2594 | | // Check if any of the sub blocks are further split. |
2595 | 0 | if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { |
2596 | 0 | sub_subsize = get_subsize(subsize, PARTITION_SPLIT); |
2597 | 0 | splits_below = 1; |
2598 | 0 | for (i = 0; i < 4; i++) { |
2599 | 0 | int jj = i >> 1, ii = i & 0x01; |
2600 | 0 | MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; |
2601 | 0 | if (this_mi && this_mi->sb_type >= sub_subsize) { |
2602 | 0 | splits_below = 0; |
2603 | 0 | } |
2604 | 0 | } |
2605 | 0 | } |
2606 | | |
2607 | | // If partition is not none try none unless each of the 4 splits are split |
2608 | | // even further.. |
2609 | 0 | if (partition != PARTITION_NONE && !splits_below && |
2610 | 0 | mi_row + (mi_step >> 1) < cm->mi_rows && |
2611 | 0 | mi_col + (mi_step >> 1) < cm->mi_cols) { |
2612 | 0 | pc_tree->partitioning = PARTITION_NONE; |
2613 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, |
2614 | 0 | INT_MAX, INT64_MAX); |
2615 | |
|
2616 | 0 | pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
2617 | |
|
2618 | 0 | if (none_rdc.rate < INT_MAX) { |
2619 | 0 | none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
2620 | 0 | none_rdc.rdcost = |
2621 | 0 | RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); |
2622 | 0 | } |
2623 | |
|
2624 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2625 | 0 | mi_8x8[0]->sb_type = bs_type; |
2626 | 0 | pc_tree->partitioning = partition; |
2627 | 0 | } |
2628 | 0 | } |
2629 | |
|
2630 | 0 | switch (partition) { |
2631 | 0 | case PARTITION_NONE: |
2632 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, |
2633 | 0 | ctx, INT_MAX, INT64_MAX); |
2634 | 0 | break; |
2635 | 0 | case PARTITION_HORZ: |
2636 | 0 | pc_tree->horizontal[0].skip_ref_frame_mask = 0; |
2637 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, |
2638 | 0 | subsize, &pc_tree->horizontal[0], INT_MAX, INT64_MAX); |
2639 | 0 | if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && |
2640 | 0 | mi_row + (mi_step >> 1) < cm->mi_rows) { |
2641 | 0 | RD_COST tmp_rdc; |
2642 | 0 | PICK_MODE_CONTEXT *hctx = &pc_tree->horizontal[0]; |
2643 | 0 | vp9_rd_cost_init(&tmp_rdc); |
2644 | 0 | update_state(cpi, td, hctx, mi_row, mi_col, subsize, 0); |
2645 | 0 | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, hctx); |
2646 | 0 | pc_tree->horizontal[1].skip_ref_frame_mask = 0; |
2647 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, |
2648 | 0 | &tmp_rdc, subsize, &pc_tree->horizontal[1], INT_MAX, |
2649 | 0 | INT64_MAX); |
2650 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2651 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2652 | 0 | break; |
2653 | 0 | } |
2654 | 0 | last_part_rdc.rate += tmp_rdc.rate; |
2655 | 0 | last_part_rdc.dist += tmp_rdc.dist; |
2656 | 0 | last_part_rdc.rdcost += tmp_rdc.rdcost; |
2657 | 0 | } |
2658 | 0 | break; |
2659 | 0 | case PARTITION_VERT: |
2660 | 0 | pc_tree->vertical[0].skip_ref_frame_mask = 0; |
2661 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, |
2662 | 0 | subsize, &pc_tree->vertical[0], INT_MAX, INT64_MAX); |
2663 | 0 | if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && |
2664 | 0 | mi_col + (mi_step >> 1) < cm->mi_cols) { |
2665 | 0 | RD_COST tmp_rdc; |
2666 | 0 | PICK_MODE_CONTEXT *vctx = &pc_tree->vertical[0]; |
2667 | 0 | vp9_rd_cost_init(&tmp_rdc); |
2668 | 0 | update_state(cpi, td, vctx, mi_row, mi_col, subsize, 0); |
2669 | 0 | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, vctx); |
2670 | 0 | pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; |
2671 | 0 | rd_pick_sb_modes( |
2672 | 0 | cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, |
2673 | 0 | subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT_MAX, INT64_MAX); |
2674 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2675 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2676 | 0 | break; |
2677 | 0 | } |
2678 | 0 | last_part_rdc.rate += tmp_rdc.rate; |
2679 | 0 | last_part_rdc.dist += tmp_rdc.dist; |
2680 | 0 | last_part_rdc.rdcost += tmp_rdc.rdcost; |
2681 | 0 | } |
2682 | 0 | break; |
2683 | 0 | default: |
2684 | 0 | assert(partition == PARTITION_SPLIT); |
2685 | 0 | if (bsize == BLOCK_8X8) { |
2686 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, |
2687 | 0 | subsize, pc_tree->u.leaf_split[0], INT_MAX, INT64_MAX); |
2688 | 0 | break; |
2689 | 0 | } |
2690 | 0 | last_part_rdc.rate = 0; |
2691 | 0 | last_part_rdc.dist = 0; |
2692 | 0 | last_part_rdc.rdcost = 0; |
2693 | 0 | for (i = 0; i < 4; i++) { |
2694 | 0 | int x_idx = (i & 1) * (mi_step >> 1); |
2695 | 0 | int y_idx = (i >> 1) * (mi_step >> 1); |
2696 | 0 | int jj = i >> 1, ii = i & 0x01; |
2697 | 0 | RD_COST tmp_rdc; |
2698 | 0 | if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) |
2699 | 0 | continue; |
2700 | | |
2701 | 0 | vp9_rd_cost_init(&tmp_rdc); |
2702 | 0 | rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss, |
2703 | 0 | tp, mi_row + y_idx, mi_col + x_idx, subsize, |
2704 | 0 | &tmp_rdc.rate, &tmp_rdc.dist, i != 3, |
2705 | 0 | pc_tree->u.split[i]); |
2706 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2707 | 0 | vp9_rd_cost_reset(&last_part_rdc); |
2708 | 0 | break; |
2709 | 0 | } |
2710 | 0 | last_part_rdc.rate += tmp_rdc.rate; |
2711 | 0 | last_part_rdc.dist += tmp_rdc.dist; |
2712 | 0 | } |
2713 | 0 | break; |
2714 | 0 | } |
2715 | | |
2716 | 0 | pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
2717 | 0 | if (last_part_rdc.rate < INT_MAX) { |
2718 | 0 | last_part_rdc.rate += cpi->partition_cost[pl][partition]; |
2719 | 0 | last_part_rdc.rdcost = |
2720 | 0 | RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); |
2721 | 0 | } |
2722 | |
|
2723 | 0 | if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && |
2724 | 0 | cpi->sf.partition_search_type == SEARCH_PARTITION && |
2725 | 0 | partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && |
2726 | 0 | (mi_row + mi_step < cm->mi_rows || |
2727 | 0 | mi_row + (mi_step >> 1) == cm->mi_rows) && |
2728 | 0 | (mi_col + mi_step < cm->mi_cols || |
2729 | 0 | mi_col + (mi_step >> 1) == cm->mi_cols)) { |
2730 | 0 | BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); |
2731 | 0 | chosen_rdc.rate = 0; |
2732 | 0 | chosen_rdc.dist = 0; |
2733 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2734 | 0 | pc_tree->partitioning = PARTITION_SPLIT; |
2735 | | |
2736 | | // Split partition. |
2737 | 0 | for (i = 0; i < 4; i++) { |
2738 | 0 | int x_idx = (i & 1) * (mi_step >> 1); |
2739 | 0 | int y_idx = (i >> 1) * (mi_step >> 1); |
2740 | 0 | RD_COST tmp_rdc; |
2741 | |
|
2742 | 0 | if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) |
2743 | 0 | continue; |
2744 | | |
2745 | 0 | save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2746 | 0 | pc_tree->u.split[i]->partitioning = PARTITION_NONE; |
2747 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, |
2748 | 0 | &tmp_rdc, split_subsize, &pc_tree->u.split[i]->none, |
2749 | 0 | INT_MAX, INT64_MAX); |
2750 | |
|
2751 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2752 | |
|
2753 | 0 | if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { |
2754 | 0 | vp9_rd_cost_reset(&chosen_rdc); |
2755 | 0 | break; |
2756 | 0 | } |
2757 | | |
2758 | 0 | chosen_rdc.rate += tmp_rdc.rate; |
2759 | 0 | chosen_rdc.dist += tmp_rdc.dist; |
2760 | |
|
2761 | 0 | if (i != 3) |
2762 | 0 | encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, |
2763 | 0 | split_subsize, pc_tree->u.split[i]); |
2764 | |
|
2765 | 0 | pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, |
2766 | 0 | split_subsize); |
2767 | 0 | chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
2768 | 0 | } |
2769 | 0 | pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
2770 | 0 | if (chosen_rdc.rate < INT_MAX) { |
2771 | 0 | chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; |
2772 | 0 | chosen_rdc.rdcost = |
2773 | 0 | RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); |
2774 | 0 | } |
2775 | 0 | } |
2776 | | |
2777 | | // If last_part is better set the partitioning to that. |
2778 | 0 | if (last_part_rdc.rdcost < chosen_rdc.rdcost) { |
2779 | 0 | mi_8x8[0]->sb_type = bsize; |
2780 | 0 | if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; |
2781 | 0 | chosen_rdc = last_part_rdc; |
2782 | 0 | } |
2783 | | // If none was better set the partitioning to that. |
2784 | 0 | if (none_rdc.rdcost < chosen_rdc.rdcost) { |
2785 | 0 | if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; |
2786 | 0 | chosen_rdc = none_rdc; |
2787 | 0 | } |
2788 | |
|
2789 | 0 | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
2790 | | |
2791 | | // We must have chosen a partitioning and encoding or we'll fail later on. |
2792 | | // No other opportunities for success. |
2793 | 0 | if (bsize == BLOCK_64X64) |
2794 | 0 | assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); |
2795 | |
|
2796 | 0 | if (do_recon) { |
2797 | 0 | int output_enabled = (bsize == BLOCK_64X64); |
2798 | 0 | encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, |
2799 | 0 | pc_tree); |
2800 | 0 | } |
2801 | |
|
2802 | 0 | *rate = chosen_rdc.rate; |
2803 | 0 | *dist = chosen_rdc.dist; |
2804 | 0 | } |
2805 | | |
2806 | | static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { |
2807 | | BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, |
2808 | | BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, |
2809 | | BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 |
2810 | | }; |
2811 | | |
2812 | | static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { |
2813 | | BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, |
2814 | | BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, |
2815 | | BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 |
2816 | | }; |
2817 | | |
2818 | | // Look at all the mode_info entries for blocks that are part of this |
2819 | | // partition and find the min and max values for sb_type. |
2820 | | // At the moment this is designed to work on a 64x64 SB but could be |
2821 | | // adjusted to use a size parameter. |
2822 | | // |
2823 | | // The min and max are assumed to have been initialized prior to calling this |
2824 | | // function so repeat calls can accumulate a min and max of more than one sb64. |
2825 | | static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, |
2826 | | BLOCK_SIZE *min_block_size, |
2827 | | BLOCK_SIZE *max_block_size, |
2828 | 0 | int bs_hist[BLOCK_SIZES]) { |
2829 | 0 | int sb_width_in_blocks = MI_BLOCK_SIZE; |
2830 | 0 | int sb_height_in_blocks = MI_BLOCK_SIZE; |
2831 | 0 | int i, j; |
2832 | 0 | int index = 0; |
2833 | | |
2834 | | // Check the sb_type for each block that belongs to this region. |
2835 | 0 | for (i = 0; i < sb_height_in_blocks; ++i) { |
2836 | 0 | for (j = 0; j < sb_width_in_blocks; ++j) { |
2837 | 0 | MODE_INFO *mi = mi_8x8[index + j]; |
2838 | 0 | BLOCK_SIZE sb_type = mi ? mi->sb_type : 0; |
2839 | 0 | bs_hist[sb_type]++; |
2840 | 0 | *min_block_size = VPXMIN(*min_block_size, sb_type); |
2841 | 0 | *max_block_size = VPXMAX(*max_block_size, sb_type); |
2842 | 0 | } |
2843 | 0 | index += xd->mi_stride; |
2844 | 0 | } |
2845 | 0 | } |
2846 | | |
2847 | | // Next square block size less or equal than current block size. |
2848 | | static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { |
2849 | | BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, |
2850 | | BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, |
2851 | | BLOCK_32X32, BLOCK_32X32, BLOCK_64X64 |
2852 | | }; |
2853 | | |
2854 | | // Look at neighboring blocks and set a min and max partition size based on |
2855 | | // what they chose. |
2856 | | static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, |
2857 | | MACROBLOCKD *const xd, int mi_row, |
2858 | | int mi_col, BLOCK_SIZE *min_block_size, |
2859 | 0 | BLOCK_SIZE *max_block_size) { |
2860 | 0 | VP9_COMMON *const cm = &cpi->common; |
2861 | 0 | MODE_INFO **mi = xd->mi; |
2862 | 0 | const int left_in_image = !!xd->left_mi; |
2863 | 0 | const int above_in_image = !!xd->above_mi; |
2864 | 0 | const int row8x8_remaining = tile->mi_row_end - mi_row; |
2865 | 0 | const int col8x8_remaining = tile->mi_col_end - mi_col; |
2866 | 0 | int bh, bw; |
2867 | 0 | BLOCK_SIZE min_size = BLOCK_4X4; |
2868 | 0 | BLOCK_SIZE max_size = BLOCK_64X64; |
2869 | 0 | int bs_hist[BLOCK_SIZES] = { 0 }; |
2870 | | |
2871 | | // Trap case where we do not have a prediction. |
2872 | 0 | if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { |
2873 | | // Default "min to max" and "max to min" |
2874 | 0 | min_size = BLOCK_64X64; |
2875 | 0 | max_size = BLOCK_4X4; |
2876 | | |
2877 | | // NOTE: each call to get_sb_partition_size_range() uses the previous |
2878 | | // passed in values for min and max as a starting point. |
2879 | | // Find the min and max partition used in previous frame at this location |
2880 | 0 | if (cm->frame_type != KEY_FRAME) { |
2881 | 0 | MODE_INFO **prev_mi = |
2882 | 0 | &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; |
2883 | 0 | get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); |
2884 | 0 | } |
2885 | | // Find the min and max partition sizes used in the left SB64 |
2886 | 0 | if (left_in_image) { |
2887 | 0 | MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; |
2888 | 0 | get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, |
2889 | 0 | bs_hist); |
2890 | 0 | } |
2891 | | // Find the min and max partition sizes used in the above SB64. |
2892 | 0 | if (above_in_image) { |
2893 | 0 | MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; |
2894 | 0 | get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, |
2895 | 0 | bs_hist); |
2896 | 0 | } |
2897 | | |
2898 | | // Adjust observed min and max for "relaxed" auto partition case. |
2899 | 0 | if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { |
2900 | 0 | min_size = min_partition_size[min_size]; |
2901 | 0 | max_size = max_partition_size[max_size]; |
2902 | 0 | } |
2903 | 0 | } |
2904 | | |
2905 | | // Check border cases where max and min from neighbors may not be legal. |
2906 | 0 | max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, |
2907 | 0 | &bh, &bw); |
2908 | | // Test for blocks at the edge of the active image. |
2909 | | // This may be the actual edge of the image or where there are formatting |
2910 | | // bars. |
2911 | 0 | if (vp9_active_edge_sb(cpi, mi_row, mi_col)) { |
2912 | 0 | min_size = BLOCK_4X4; |
2913 | 0 | } else { |
2914 | 0 | min_size = |
2915 | 0 | VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); |
2916 | 0 | } |
2917 | | |
2918 | | // When use_square_partition_only is true, make sure at least one square |
2919 | | // partition is allowed by selecting the next smaller square size as |
2920 | | // *min_block_size. |
2921 | 0 | if (cpi->sf.use_square_partition_only && |
2922 | 0 | next_square_size[max_size] < min_size) { |
2923 | 0 | min_size = next_square_size[max_size]; |
2924 | 0 | } |
2925 | |
|
2926 | 0 | *min_block_size = min_size; |
2927 | 0 | *max_block_size = max_size; |
2928 | 0 | } |
2929 | | |
2930 | | // TODO(jingning) refactor functions setting partition search range |
2931 | | static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, |
2932 | | int mi_col, BLOCK_SIZE bsize, |
2933 | 0 | BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) { |
2934 | 0 | int mi_width = num_8x8_blocks_wide_lookup[bsize]; |
2935 | 0 | int mi_height = num_8x8_blocks_high_lookup[bsize]; |
2936 | 0 | int idx, idy; |
2937 | |
|
2938 | 0 | MODE_INFO *mi; |
2939 | 0 | const int idx_str = cm->mi_stride * mi_row + mi_col; |
2940 | 0 | MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; |
2941 | 0 | BLOCK_SIZE bs, min_size, max_size; |
2942 | |
|
2943 | 0 | min_size = BLOCK_64X64; |
2944 | 0 | max_size = BLOCK_4X4; |
2945 | |
|
2946 | 0 | for (idy = 0; idy < mi_height; ++idy) { |
2947 | 0 | for (idx = 0; idx < mi_width; ++idx) { |
2948 | 0 | mi = prev_mi[idy * cm->mi_stride + idx]; |
2949 | 0 | bs = mi ? mi->sb_type : bsize; |
2950 | 0 | min_size = VPXMIN(min_size, bs); |
2951 | 0 | max_size = VPXMAX(max_size, bs); |
2952 | 0 | } |
2953 | 0 | } |
2954 | |
|
2955 | 0 | if (xd->left_mi) { |
2956 | 0 | for (idy = 0; idy < mi_height; ++idy) { |
2957 | 0 | mi = xd->mi[idy * cm->mi_stride - 1]; |
2958 | 0 | bs = mi ? mi->sb_type : bsize; |
2959 | 0 | min_size = VPXMIN(min_size, bs); |
2960 | 0 | max_size = VPXMAX(max_size, bs); |
2961 | 0 | } |
2962 | 0 | } |
2963 | |
|
2964 | 0 | if (xd->above_mi) { |
2965 | 0 | for (idx = 0; idx < mi_width; ++idx) { |
2966 | 0 | mi = xd->mi[idx - cm->mi_stride]; |
2967 | 0 | bs = mi ? mi->sb_type : bsize; |
2968 | 0 | min_size = VPXMIN(min_size, bs); |
2969 | 0 | max_size = VPXMAX(max_size, bs); |
2970 | 0 | } |
2971 | 0 | } |
2972 | |
|
2973 | 0 | if (min_size == max_size) { |
2974 | 0 | min_size = min_partition_size[min_size]; |
2975 | 0 | max_size = max_partition_size[max_size]; |
2976 | 0 | } |
2977 | |
|
2978 | 0 | *min_bs = min_size; |
2979 | 0 | *max_bs = max_size; |
2980 | 0 | } |
2981 | | #endif // !CONFIG_REALTIME_ONLY |
2982 | | |
2983 | 4.05M | static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { |
2984 | 4.05M | memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); |
2985 | 4.05M | } |
2986 | | |
2987 | 7.43M | static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { |
2988 | 7.43M | memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); |
2989 | 7.43M | } |
2990 | | |
2991 | | // Calculate prediction based on the given input features and neural net config. |
2992 | | // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden |
2993 | | // layer. |
2994 | | static void nn_predict(const float *features, const NN_CONFIG *nn_config, |
2995 | 1.14M | float *output) { |
2996 | 1.14M | int num_input_nodes = nn_config->num_inputs; |
2997 | 1.14M | int buf_index = 0; |
2998 | 1.14M | float buf[2][NN_MAX_NODES_PER_LAYER]; |
2999 | 1.14M | const float *input_nodes = features; |
3000 | | |
3001 | | // Propagate hidden layers. |
3002 | 1.14M | const int num_layers = nn_config->num_hidden_layers; |
3003 | 1.14M | int layer, node, i; |
3004 | 1.14M | assert(num_layers <= NN_MAX_HIDDEN_LAYERS); |
3005 | 2.28M | for (layer = 0; layer < num_layers; ++layer) { |
3006 | 1.14M | const float *weights = nn_config->weights[layer]; |
3007 | 1.14M | const float *bias = nn_config->bias[layer]; |
3008 | 1.14M | float *output_nodes = buf[buf_index]; |
3009 | 1.14M | const int num_output_nodes = nn_config->num_hidden_nodes[layer]; |
3010 | 1.14M | assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); |
3011 | 11.8M | for (node = 0; node < num_output_nodes; ++node) { |
3012 | 10.7M | float val = 0.0f; |
3013 | 126M | for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; |
3014 | 10.7M | val += bias[node]; |
3015 | | // ReLU as activation function. |
3016 | 10.7M | val = VPXMAX(val, 0.0f); |
3017 | 10.7M | output_nodes[node] = val; |
3018 | 10.7M | weights += num_input_nodes; |
3019 | 10.7M | } |
3020 | 1.14M | num_input_nodes = num_output_nodes; |
3021 | 1.14M | input_nodes = output_nodes; |
3022 | 1.14M | buf_index = 1 - buf_index; |
3023 | 1.14M | } |
3024 | | |
3025 | | // Final output layer. |
3026 | 1.14M | { |
3027 | 1.14M | const float *weights = nn_config->weights[num_layers]; |
3028 | 2.87M | for (node = 0; node < nn_config->num_outputs; ++node) { |
3029 | 1.73M | const float *bias = nn_config->bias[num_layers]; |
3030 | 1.73M | float val = 0.0f; |
3031 | 21.9M | for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; |
3032 | 1.73M | output[node] = val + bias[node]; |
3033 | 1.73M | weights += num_input_nodes; |
3034 | 1.73M | } |
3035 | 1.14M | } |
3036 | 1.14M | } |
3037 | | |
3038 | | #if !CONFIG_REALTIME_ONLY |
3039 | 0 | #define FEATURES 7 |
3040 | | // Machine-learning based partition search early termination. |
3041 | | // Return 1 to skip split and rect partitions. |
3042 | | static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, |
3043 | | PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, |
3044 | 0 | BLOCK_SIZE bsize) { |
3045 | 0 | const int mag_mv = |
3046 | 0 | abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); |
3047 | 0 | const int left_in_image = !!xd->left_mi; |
3048 | 0 | const int above_in_image = !!xd->above_mi; |
3049 | 0 | MODE_INFO **prev_mi = |
3050 | 0 | &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; |
3051 | 0 | int above_par = 0; // above_partitioning |
3052 | 0 | int left_par = 0; // left_partitioning |
3053 | 0 | int last_par = 0; // last_partitioning |
3054 | 0 | int offset = 0; |
3055 | 0 | int i; |
3056 | 0 | BLOCK_SIZE context_size; |
3057 | 0 | const NN_CONFIG *nn_config = NULL; |
3058 | 0 | const float *mean, *sd, *linear_weights; |
3059 | 0 | float nn_score, linear_score; |
3060 | 0 | float features[FEATURES]; |
3061 | |
|
3062 | 0 | assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); |
3063 | 0 | vpx_clear_system_state(); |
3064 | |
|
3065 | 0 | switch (bsize) { |
3066 | 0 | case BLOCK_64X64: |
3067 | 0 | offset = 0; |
3068 | 0 | nn_config = &vp9_partition_nnconfig_64x64; |
3069 | 0 | break; |
3070 | 0 | case BLOCK_32X32: |
3071 | 0 | offset = 8; |
3072 | 0 | nn_config = &vp9_partition_nnconfig_32x32; |
3073 | 0 | break; |
3074 | 0 | case BLOCK_16X16: |
3075 | 0 | offset = 16; |
3076 | 0 | nn_config = &vp9_partition_nnconfig_16x16; |
3077 | 0 | break; |
3078 | 0 | default: assert(0 && "Unexpected block size."); return 0; |
3079 | 0 | } |
3080 | | |
3081 | 0 | if (above_in_image) { |
3082 | 0 | context_size = xd->above_mi->sb_type; |
3083 | 0 | if (context_size < bsize) |
3084 | 0 | above_par = 2; |
3085 | 0 | else if (context_size == bsize) |
3086 | 0 | above_par = 1; |
3087 | 0 | } |
3088 | |
|
3089 | 0 | if (left_in_image) { |
3090 | 0 | context_size = xd->left_mi->sb_type; |
3091 | 0 | if (context_size < bsize) |
3092 | 0 | left_par = 2; |
3093 | 0 | else if (context_size == bsize) |
3094 | 0 | left_par = 1; |
3095 | 0 | } |
3096 | |
|
3097 | 0 | if (prev_mi[0]) { |
3098 | 0 | context_size = prev_mi[0]->sb_type; |
3099 | 0 | if (context_size < bsize) |
3100 | 0 | last_par = 2; |
3101 | 0 | else if (context_size == bsize) |
3102 | 0 | last_par = 1; |
3103 | 0 | } |
3104 | |
|
3105 | 0 | mean = &vp9_partition_feature_mean[offset]; |
3106 | 0 | sd = &vp9_partition_feature_std[offset]; |
3107 | 0 | features[0] = ((float)ctx->rate - mean[0]) / sd[0]; |
3108 | 0 | features[1] = ((float)ctx->dist - mean[1]) / sd[1]; |
3109 | 0 | features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2]; |
3110 | 0 | features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3]; |
3111 | 0 | features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4]; |
3112 | 0 | features[5] = ((float)cm->base_qindex - mean[5]) * sd[5]; |
3113 | 0 | features[6] = ((float)last_par - mean[6]) * sd[6]; |
3114 | | |
3115 | | // Predict using linear model. |
3116 | 0 | linear_weights = &vp9_partition_linear_weights[offset]; |
3117 | 0 | linear_score = linear_weights[FEATURES]; |
3118 | 0 | for (i = 0; i < FEATURES; ++i) |
3119 | 0 | linear_score += linear_weights[i] * features[i]; |
3120 | 0 | if (linear_score > 0.1f) return 0; |
3121 | | |
3122 | | // Predict using neural net model. |
3123 | 0 | nn_predict(features, nn_config, &nn_score); |
3124 | |
|
3125 | 0 | if (linear_score < -0.0f && nn_score < 0.1f) return 1; |
3126 | 0 | if (nn_score < -0.0f && linear_score < 0.1f) return 1; |
3127 | 0 | return 0; |
3128 | 0 | } |
3129 | | #undef FEATURES |
3130 | | |
3131 | 241k | #define FEATURES 4 |
3132 | | // ML-based partition search breakout. |
3133 | | static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, |
3134 | | const MACROBLOCK *const x, |
3135 | 40.3k | const RD_COST *const rd_cost) { |
3136 | 40.3k | DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; |
3137 | 40.3k | const VP9_COMMON *const cm = &cpi->common; |
3138 | 40.3k | float features[FEATURES]; |
3139 | 40.3k | const float *linear_weights = NULL; // Linear model weights. |
3140 | 40.3k | float linear_score = 0.0f; |
3141 | 40.3k | const int qindex = cm->base_qindex; |
3142 | 40.3k | const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2); |
3143 | 40.3k | const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720; |
3144 | 40.3k | const int resolution_ctx = is_720p_or_larger ? 1 : 0; |
3145 | | |
3146 | 40.3k | switch (bsize) { |
3147 | 208 | case BLOCK_64X64: |
3148 | 208 | linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx]; |
3149 | 208 | break; |
3150 | 1.16k | case BLOCK_32X32: |
3151 | 1.16k | linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx]; |
3152 | 1.16k | break; |
3153 | 5.63k | case BLOCK_16X16: |
3154 | 5.63k | linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx]; |
3155 | 5.63k | break; |
3156 | 33.3k | case BLOCK_8X8: |
3157 | 33.3k | linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx]; |
3158 | 33.3k | break; |
3159 | 0 | default: assert(0 && "Unexpected block size."); return 0; |
3160 | 40.3k | } |
3161 | 40.3k | if (!linear_weights) return 0; |
3162 | | |
3163 | 40.3k | { // Generate feature values. |
3164 | 40.3k | #if CONFIG_VP9_HIGHBITDEPTH |
3165 | 40.3k | const int ac_q = |
3166 | 40.3k | vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); |
3167 | | #else |
3168 | | const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); |
3169 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3170 | 40.3k | const int num_pels_log2 = num_pels_log2_lookup[bsize]; |
3171 | 40.3k | int feature_index = 0; |
3172 | 40.3k | unsigned int var, sse; |
3173 | 40.3k | float rate_f, dist_f; |
3174 | | |
3175 | 40.3k | #if CONFIG_VP9_HIGHBITDEPTH |
3176 | 40.3k | if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
3177 | 0 | var = |
3178 | 0 | vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd); |
3179 | 40.3k | } else { |
3180 | 40.3k | var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
3181 | 40.3k | vp9_64_zeros, 0, &sse); |
3182 | 40.3k | } |
3183 | | #else |
3184 | | var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
3185 | | vp9_64_zeros, 0, &sse); |
3186 | | #endif |
3187 | 40.3k | var = var >> num_pels_log2; |
3188 | | |
3189 | 40.3k | vpx_clear_system_state(); |
3190 | | |
3191 | 40.3k | rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX); |
3192 | 40.3k | dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2); |
3193 | 40.3k | rate_f = |
3194 | 40.3k | ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * |
3195 | 40.3k | rate_f; |
3196 | | |
3197 | 40.3k | features[feature_index++] = rate_f; |
3198 | 40.3k | features[feature_index++] = dist_f; |
3199 | 40.3k | features[feature_index++] = (float)var; |
3200 | 40.3k | features[feature_index++] = (float)ac_q; |
3201 | 40.3k | assert(feature_index == FEATURES); |
3202 | 40.3k | } |
3203 | | |
3204 | 40.3k | { // Calculate the output score. |
3205 | 40.3k | int i; |
3206 | 40.3k | linear_score = linear_weights[FEATURES]; |
3207 | 201k | for (i = 0; i < FEATURES; ++i) |
3208 | 161k | linear_score += linear_weights[i] * features[i]; |
3209 | 40.3k | } |
3210 | | |
3211 | 40.3k | return linear_score >= cpi->sf.rd_ml_partition.search_breakout_thresh[q_ctx]; |
3212 | 40.3k | } |
3213 | | #undef FEATURES |
3214 | | |
3215 | | #define FEATURES 8 |
3216 | 1.98M | #define LABELS 4 |
3217 | | static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, |
3218 | | BLOCK_SIZE bsize, |
3219 | | const PC_TREE *const pc_tree, |
3220 | | int *allow_horz, int *allow_vert, |
3221 | 589k | int64_t ref_rd) { |
3222 | 589k | const NN_CONFIG *nn_config = NULL; |
3223 | 589k | float score[LABELS] = { |
3224 | 589k | 0.0f, |
3225 | 589k | }; |
3226 | 589k | int thresh = -1; |
3227 | 589k | int i; |
3228 | 589k | (void)x; |
3229 | | |
3230 | 589k | if (ref_rd <= 0 || ref_rd > 1000000000) return; |
3231 | | |
3232 | 447k | switch (bsize) { |
3233 | 0 | case BLOCK_8X8: break; |
3234 | 348k | case BLOCK_16X16: |
3235 | 348k | nn_config = &vp9_rect_part_nnconfig_16; |
3236 | 348k | thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[1]; |
3237 | 348k | break; |
3238 | 87.5k | case BLOCK_32X32: |
3239 | 87.5k | nn_config = &vp9_rect_part_nnconfig_32; |
3240 | 87.5k | thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[2]; |
3241 | 87.5k | break; |
3242 | 11.2k | case BLOCK_64X64: |
3243 | 11.2k | nn_config = &vp9_rect_part_nnconfig_64; |
3244 | 11.2k | thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[3]; |
3245 | 11.2k | break; |
3246 | 0 | default: assert(0 && "Unexpected block size."); return; |
3247 | 447k | } |
3248 | 447k | if (!nn_config || thresh < 0) return; |
3249 | | |
3250 | | // Feature extraction and model score calculation. |
3251 | 198k | { |
3252 | 198k | const VP9_COMMON *const cm = &cpi->common; |
3253 | 198k | #if CONFIG_VP9_HIGHBITDEPTH |
3254 | 198k | const int dc_q = |
3255 | 198k | vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); |
3256 | | #else |
3257 | | const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); |
3258 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3259 | 198k | const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; |
3260 | 198k | int feature_index = 0; |
3261 | 198k | float features[FEATURES]; |
3262 | | |
3263 | 198k | features[feature_index++] = logf((float)dc_q + 1.0f); |
3264 | 198k | features[feature_index++] = |
3265 | 198k | (float)(pc_tree->partitioning == PARTITION_NONE); |
3266 | 198k | features[feature_index++] = logf((float)ref_rd / bs / bs + 1.0f); |
3267 | | |
3268 | 198k | { |
3269 | 198k | const float norm_factor = 1.0f / ((float)ref_rd + 1.0f); |
3270 | 198k | const int64_t none_rdcost = pc_tree->none.rdcost; |
3271 | 198k | float rd_ratio = 2.0f; |
3272 | 198k | if (none_rdcost > 0 && none_rdcost < 1000000000) |
3273 | 143k | rd_ratio = (float)none_rdcost * norm_factor; |
3274 | 198k | features[feature_index++] = VPXMIN(rd_ratio, 2.0f); |
3275 | | |
3276 | 992k | for (i = 0; i < 4; ++i) { |
3277 | 794k | const int64_t this_rd = pc_tree->u.split[i]->none.rdcost; |
3278 | 794k | const int rd_valid = this_rd > 0 && this_rd < 1000000000; |
3279 | | // Ratio between sub-block RD and whole block RD. |
3280 | 794k | features[feature_index++] = |
3281 | 794k | rd_valid ? (float)this_rd * norm_factor : 1.0f; |
3282 | 794k | } |
3283 | 198k | } |
3284 | | |
3285 | 198k | assert(feature_index == FEATURES); |
3286 | 198k | nn_predict(features, nn_config, score); |
3287 | 198k | } |
3288 | | |
3289 | | // Make decisions based on the model score. |
3290 | 198k | { |
3291 | 198k | int max_score = -1000; |
3292 | 198k | int horz = 0, vert = 0; |
3293 | 198k | int int_score[LABELS]; |
3294 | 992k | for (i = 0; i < LABELS; ++i) { |
3295 | 794k | int_score[i] = (int)(100 * score[i]); |
3296 | 794k | max_score = VPXMAX(int_score[i], max_score); |
3297 | 794k | } |
3298 | 198k | thresh = max_score - thresh; |
3299 | 992k | for (i = 0; i < LABELS; ++i) { |
3300 | 794k | if (int_score[i] >= thresh) { |
3301 | 238k | if ((i >> 0) & 1) horz = 1; |
3302 | 238k | if ((i >> 1) & 1) vert = 1; |
3303 | 238k | } |
3304 | 794k | } |
3305 | 198k | *allow_horz = *allow_horz && horz; |
3306 | 198k | *allow_vert = *allow_vert && vert; |
3307 | 198k | } |
3308 | 198k | } |
3309 | | #undef FEATURES |
3310 | | #undef LABELS |
3311 | | |
3312 | | // Perform fast and coarse motion search for the given block. This is a |
3313 | | // pre-processing step for the ML based partition search speedup. |
3314 | | static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x, |
3315 | | BLOCK_SIZE bsize, int mi_row, int mi_col, |
3316 | | MV ref_mv, MV_REFERENCE_FRAME ref, |
3317 | 942k | uint8_t *const pred_buf) { |
3318 | 942k | const VP9_COMMON *const cm = &cpi->common; |
3319 | 942k | MACROBLOCKD *const xd = &x->e_mbd; |
3320 | 942k | MODE_INFO *const mi = xd->mi[0]; |
3321 | 942k | YV12_BUFFER_CONFIG *yv12; |
3322 | 942k | YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); |
3323 | 942k | const int step_param = 1; |
3324 | 942k | const MvLimits tmp_mv_limits = x->mv_limits; |
3325 | 942k | const SEARCH_METHODS search_method = NSTEP; |
3326 | 942k | const int sadpb = x->sadperbit16; |
3327 | 942k | MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; |
3328 | 942k | MV best_mv = { 0, 0 }; |
3329 | 942k | int cost_list[5]; |
3330 | 942k | struct buf_2d backup_pre[MAX_MB_PLANE] = { { 0, 0 } }; |
3331 | | |
3332 | 942k | if (scaled_ref_frame) { |
3333 | 0 | yv12 = scaled_ref_frame; |
3334 | | // As reported in b/311294795, the reference buffer pointer needs to be |
3335 | | // saved and restored after the search. Otherwise, it causes problems while |
3336 | | // the reference frame scaling happens. |
3337 | 0 | for (int i = 0; i < MAX_MB_PLANE; i++) backup_pre[i] = xd->plane[i].pre[0]; |
3338 | 942k | } else { |
3339 | 942k | yv12 = get_ref_frame_buffer(cpi, ref); |
3340 | 942k | } |
3341 | | |
3342 | 942k | assert(yv12 != NULL); |
3343 | 942k | if (!yv12) return; |
3344 | 942k | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, NULL); |
3345 | 942k | mi->ref_frame[0] = ref; |
3346 | 942k | mi->ref_frame[1] = NO_REF_FRAME; |
3347 | 942k | mi->sb_type = bsize; |
3348 | 942k | vp9_set_mv_search_range(&x->mv_limits, &ref_mv); |
3349 | 942k | vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, |
3350 | 942k | sadpb, cond_cost_list(cpi, cost_list), &ref_mv, |
3351 | 942k | &best_mv, 0, 0); |
3352 | 942k | best_mv.row *= 8; |
3353 | 942k | best_mv.col *= 8; |
3354 | 942k | x->mv_limits = tmp_mv_limits; |
3355 | 942k | mi->mv[0].as_mv = best_mv; |
3356 | | |
3357 | | // Restore reference buffer pointer. |
3358 | 942k | if (scaled_ref_frame) { |
3359 | 0 | for (int i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_pre[i]; |
3360 | 0 | } |
3361 | | |
3362 | 942k | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
3363 | 942k | xd->plane[0].dst.buf = pred_buf; |
3364 | 942k | xd->plane[0].dst.stride = 64; |
3365 | 942k | vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); |
3366 | 942k | } |
3367 | | |
3368 | | // Use a neural net model to prune partition-none and partition-split search. |
3369 | | // Features used: QP; spatial block size contexts; variance of prediction |
3370 | | // residue after simple_motion_search. |
3371 | | #define FEATURES 12 |
3372 | | static void ml_predict_var_rd_partitioning(const VP9_COMP *const cpi, |
3373 | | MACROBLOCK *const x, |
3374 | | PC_TREE *const pc_tree, |
3375 | | BLOCK_SIZE bsize, int mi_row, |
3376 | 942k | int mi_col, int *none, int *split) { |
3377 | 942k | const VP9_COMMON *const cm = &cpi->common; |
3378 | 942k | const NN_CONFIG *nn_config = NULL; |
3379 | 942k | const MACROBLOCKD *const xd = &x->e_mbd; |
3380 | 942k | #if CONFIG_VP9_HIGHBITDEPTH |
3381 | 942k | DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); |
3382 | 942k | uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
3383 | 942k | ? (CONVERT_TO_BYTEPTR(pred_buffer)) |
3384 | 942k | : pred_buffer; |
3385 | | #else |
3386 | | DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]); |
3387 | | uint8_t *const pred_buf = pred_buffer; |
3388 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3389 | 942k | const int speed = cpi->oxcf.speed; |
3390 | 942k | float thresh = 0.0f; |
3391 | | |
3392 | 942k | switch (bsize) { |
3393 | 10.6k | case BLOCK_64X64: |
3394 | 10.6k | nn_config = &vp9_part_split_nnconfig_64; |
3395 | 10.6k | thresh = speed > 0 ? 2.8f : 3.0f; |
3396 | 10.6k | break; |
3397 | 46.0k | case BLOCK_32X32: |
3398 | 46.0k | nn_config = &vp9_part_split_nnconfig_32; |
3399 | 46.0k | thresh = speed > 0 ? 3.5f : 3.0f; |
3400 | 46.0k | break; |
3401 | 182k | case BLOCK_16X16: |
3402 | 182k | nn_config = &vp9_part_split_nnconfig_16; |
3403 | 182k | thresh = speed > 0 ? 3.8f : 4.0f; |
3404 | 182k | break; |
3405 | 703k | case BLOCK_8X8: |
3406 | 703k | nn_config = &vp9_part_split_nnconfig_8; |
3407 | 703k | if (cm->width >= 720 && cm->height >= 720) |
3408 | 0 | thresh = speed > 0 ? 2.5f : 2.0f; |
3409 | 703k | else |
3410 | 703k | thresh = speed > 0 ? 3.8f : 2.0f; |
3411 | 703k | break; |
3412 | 0 | default: assert(0 && "Unexpected block size."); return; |
3413 | 942k | } |
3414 | | |
3415 | 942k | if (!nn_config) return; |
3416 | | |
3417 | | // Do a simple single motion search to find a prediction for current block. |
3418 | | // The variance of the residue will be used as input features. |
3419 | 942k | { |
3420 | 942k | MV ref_mv; |
3421 | 942k | const MV_REFERENCE_FRAME ref = |
3422 | 942k | cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; |
3423 | | // If bsize is 64x64, use zero MV as reference; otherwise, use MV result |
3424 | | // of previous(larger) block as reference. |
3425 | 942k | if (bsize == BLOCK_64X64) |
3426 | 10.6k | ref_mv.row = ref_mv.col = 0; |
3427 | 931k | else |
3428 | 931k | ref_mv = pc_tree->mv; |
3429 | 942k | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
3430 | 942k | simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); |
3431 | 942k | pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv; |
3432 | 942k | } |
3433 | | |
3434 | 942k | vpx_clear_system_state(); |
3435 | | |
3436 | 942k | { |
3437 | 942k | float features[FEATURES] = { 0.0f }; |
3438 | 942k | #if CONFIG_VP9_HIGHBITDEPTH |
3439 | 942k | const int dc_q = |
3440 | 942k | vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8); |
3441 | | #else |
3442 | | const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); |
3443 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3444 | 942k | int feature_idx = 0; |
3445 | 942k | float score; |
3446 | | |
3447 | | // Generate model input features. |
3448 | 942k | features[feature_idx++] = logf((float)dc_q + 1.0f); |
3449 | | |
3450 | | // Get the variance of the residue as input features. |
3451 | 942k | { |
3452 | 942k | const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; |
3453 | 942k | const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); |
3454 | 942k | const uint8_t *pred = pred_buf; |
3455 | 942k | const uint8_t *src = x->plane[0].src.buf; |
3456 | 942k | const int src_stride = x->plane[0].src.stride; |
3457 | 942k | const int pred_stride = 64; |
3458 | 942k | unsigned int sse; |
3459 | | // Variance of whole block. |
3460 | 942k | const unsigned int var = |
3461 | 942k | cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); |
3462 | 942k | const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); |
3463 | 942k | const int has_above = !!xd->above_mi; |
3464 | 942k | const int has_left = !!xd->left_mi; |
3465 | 942k | const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize; |
3466 | 942k | const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize; |
3467 | 942k | int i; |
3468 | | |
3469 | 942k | features[feature_idx++] = (float)has_above; |
3470 | 942k | features[feature_idx++] = (float)b_width_log2_lookup[above_bsize]; |
3471 | 942k | features[feature_idx++] = (float)b_height_log2_lookup[above_bsize]; |
3472 | 942k | features[feature_idx++] = (float)has_left; |
3473 | 942k | features[feature_idx++] = (float)b_width_log2_lookup[left_bsize]; |
3474 | 942k | features[feature_idx++] = (float)b_height_log2_lookup[left_bsize]; |
3475 | 942k | features[feature_idx++] = logf((float)var + 1.0f); |
3476 | 4.71M | for (i = 0; i < 4; ++i) { |
3477 | 3.77M | const int x_idx = (i & 1) * bs / 2; |
3478 | 3.77M | const int y_idx = (i >> 1) * bs / 2; |
3479 | 3.77M | const int src_offset = y_idx * src_stride + x_idx; |
3480 | 3.77M | const int pred_offset = y_idx * pred_stride + x_idx; |
3481 | | // Variance of quarter block. |
3482 | 3.77M | const unsigned int sub_var = |
3483 | 3.77M | cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, |
3484 | 3.77M | pred + pred_offset, pred_stride, &sse); |
3485 | 3.77M | const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; |
3486 | 3.77M | features[feature_idx++] = var_ratio; |
3487 | 3.77M | } |
3488 | 942k | } |
3489 | 942k | assert(feature_idx == FEATURES); |
3490 | | |
3491 | | // Feed the features into the model to get the confidence score. |
3492 | 942k | nn_predict(features, nn_config, &score); |
3493 | | |
3494 | | // Higher score means that the model has higher confidence that the split |
3495 | | // partition is better than the non-split partition. So if the score is |
3496 | | // high enough, we skip the none-split partition search; if the score is |
3497 | | // low enough, we skip the split partition search. |
3498 | 942k | if (score > thresh) *none = 0; |
3499 | 942k | if (score < -thresh) *split = 0; |
3500 | 942k | } |
3501 | 942k | } |
3502 | | #undef FEATURES |
3503 | | #endif // !CONFIG_REALTIME_ONLY |
3504 | | |
3505 | 0 | static double log_wiener_var(int64_t wiener_variance) { |
3506 | 0 | return log(1.0 + wiener_variance) / log(2.0); |
3507 | 0 | } |
3508 | | |
3509 | 0 | static void build_kmeans_segmentation(VP9_COMP *cpi) { |
3510 | 0 | VP9_COMMON *cm = &cpi->common; |
3511 | 0 | BLOCK_SIZE bsize = BLOCK_64X64; |
3512 | 0 | KMEANS_DATA *kmeans_data; |
3513 | |
|
3514 | 0 | vp9_disable_segmentation(&cm->seg); |
3515 | 0 | if (cm->show_frame) { |
3516 | 0 | int mi_row, mi_col; |
3517 | 0 | cpi->kmeans_data_size = 0; |
3518 | 0 | cpi->kmeans_ctr_num = 8; |
3519 | |
|
3520 | 0 | for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { |
3521 | 0 | for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { |
3522 | 0 | int mb_row_start = mi_row >> 1; |
3523 | 0 | int mb_col_start = mi_col >> 1; |
3524 | 0 | int mb_row_end = VPXMIN( |
3525 | 0 | (mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); |
3526 | 0 | int mb_col_end = VPXMIN( |
3527 | 0 | (mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); |
3528 | 0 | int row, col; |
3529 | 0 | int64_t wiener_variance = 0; |
3530 | |
|
3531 | 0 | for (row = mb_row_start; row < mb_row_end; ++row) |
3532 | 0 | for (col = mb_col_start; col < mb_col_end; ++col) |
3533 | 0 | wiener_variance += cpi->mb_wiener_variance[row * cm->mb_cols + col]; |
3534 | |
|
3535 | 0 | wiener_variance /= |
3536 | 0 | (mb_row_end - mb_row_start) * (mb_col_end - mb_col_start); |
3537 | |
|
3538 | 0 | #if CONFIG_MULTITHREAD |
3539 | 0 | pthread_mutex_lock(&cpi->kmeans_mutex); |
3540 | 0 | #endif // CONFIG_MULTITHREAD |
3541 | |
|
3542 | 0 | kmeans_data = &cpi->kmeans_data_arr[cpi->kmeans_data_size++]; |
3543 | 0 | kmeans_data->value = log_wiener_var(wiener_variance); |
3544 | 0 | kmeans_data->pos = mi_row * cpi->kmeans_data_stride + mi_col; |
3545 | 0 | #if CONFIG_MULTITHREAD |
3546 | 0 | pthread_mutex_unlock(&cpi->kmeans_mutex); |
3547 | 0 | #endif // CONFIG_MULTITHREAD |
3548 | 0 | } |
3549 | 0 | } |
3550 | |
|
3551 | 0 | vp9_kmeans(cpi->kmeans_ctr_ls, cpi->kmeans_boundary_ls, |
3552 | 0 | cpi->kmeans_count_ls, cpi->kmeans_ctr_num, cpi->kmeans_data_arr, |
3553 | 0 | cpi->kmeans_data_size); |
3554 | |
|
3555 | 0 | vp9_perceptual_aq_mode_setup(cpi, &cm->seg); |
3556 | 0 | } |
3557 | 0 | } |
3558 | | |
3559 | | #if !CONFIG_REALTIME_ONLY |
3560 | | static int wiener_var_segment(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
3561 | 0 | int mi_col) { |
3562 | 0 | VP9_COMMON *cm = &cpi->common; |
3563 | 0 | int mb_row_start = mi_row >> 1; |
3564 | 0 | int mb_col_start = mi_col >> 1; |
3565 | 0 | int mb_row_end = |
3566 | 0 | VPXMIN((mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); |
3567 | 0 | int mb_col_end = |
3568 | 0 | VPXMIN((mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); |
3569 | 0 | int row, col, idx; |
3570 | 0 | int64_t wiener_variance = 0; |
3571 | 0 | int segment_id; |
3572 | 0 | int8_t seg_hist[MAX_SEGMENTS] = { 0 }; |
3573 | 0 | int8_t max_count = 0, max_index = -1; |
3574 | |
|
3575 | 0 | vpx_clear_system_state(); |
3576 | |
|
3577 | 0 | assert(cpi->norm_wiener_variance > 0); |
3578 | |
|
3579 | 0 | for (row = mb_row_start; row < mb_row_end; ++row) { |
3580 | 0 | for (col = mb_col_start; col < mb_col_end; ++col) { |
3581 | 0 | wiener_variance = cpi->mb_wiener_variance[row * cm->mb_cols + col]; |
3582 | 0 | segment_id = |
3583 | 0 | vp9_get_group_idx(log_wiener_var(wiener_variance), |
3584 | 0 | cpi->kmeans_boundary_ls, cpi->kmeans_ctr_num); |
3585 | 0 | ++seg_hist[segment_id]; |
3586 | 0 | } |
3587 | 0 | } |
3588 | |
|
3589 | 0 | for (idx = 0; idx < cpi->kmeans_ctr_num; ++idx) { |
3590 | 0 | if (seg_hist[idx] > max_count) { |
3591 | 0 | max_count = seg_hist[idx]; |
3592 | 0 | max_index = idx; |
3593 | 0 | } |
3594 | 0 | } |
3595 | |
|
3596 | 0 | assert(max_index >= 0); |
3597 | 0 | segment_id = max_index; |
3598 | |
|
3599 | 0 | return segment_id; |
3600 | 0 | } |
3601 | | |
3602 | | static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, |
3603 | 0 | int mi_col, int orig_rdmult) { |
3604 | 0 | const int gf_group_index = cpi->twopass.gf_group.index; |
3605 | 0 | int64_t intra_cost = 0; |
3606 | 0 | int64_t mc_dep_cost = 0; |
3607 | 0 | int mi_wide = num_8x8_blocks_wide_lookup[bsize]; |
3608 | 0 | int mi_high = num_8x8_blocks_high_lookup[bsize]; |
3609 | 0 | int row, col; |
3610 | |
|
3611 | 0 | int dr = 0; |
3612 | 0 | double r0, rk, beta; |
3613 | |
|
3614 | 0 | TplDepFrame *tpl_frame; |
3615 | 0 | TplDepStats *tpl_stats; |
3616 | 0 | int tpl_stride; |
3617 | |
|
3618 | 0 | if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; |
3619 | 0 | tpl_frame = &cpi->tpl_stats[gf_group_index]; |
3620 | |
|
3621 | 0 | if (tpl_frame->is_valid == 0) return orig_rdmult; |
3622 | 0 | tpl_stats = tpl_frame->tpl_stats_ptr; |
3623 | 0 | tpl_stride = tpl_frame->stride; |
3624 | |
|
3625 | 0 | if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; |
3626 | | |
3627 | 0 | if (cpi->ext_ratectrl.ready && |
3628 | 0 | (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_QP) != 0 && |
3629 | 0 | cpi->ext_ratectrl.funcs.get_encodeframe_decision != NULL) { |
3630 | 0 | int sb_size = num_8x8_blocks_wide_lookup[BLOCK_64X64] * MI_SIZE; |
3631 | 0 | int sb_stride = (cpi->common.width + sb_size - 1) / sb_size; |
3632 | 0 | int sby = mi_row / 8; |
3633 | 0 | int sbx = mi_col / 8; |
3634 | 0 | return (int)((cpi->sb_mul_scale[sby * sb_stride + sbx] * orig_rdmult) / |
3635 | 0 | 256); |
3636 | 0 | } |
3637 | | |
3638 | 0 | for (row = mi_row; row < mi_row + mi_high; ++row) { |
3639 | 0 | for (col = mi_col; col < mi_col + mi_wide; ++col) { |
3640 | 0 | TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; |
3641 | |
|
3642 | 0 | if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; |
3643 | | |
3644 | 0 | intra_cost += this_stats->intra_cost; |
3645 | 0 | mc_dep_cost += this_stats->mc_dep_cost; |
3646 | 0 | } |
3647 | 0 | } |
3648 | |
|
3649 | 0 | vpx_clear_system_state(); |
3650 | |
|
3651 | 0 | r0 = cpi->rd.r0; |
3652 | 0 | rk = (double)intra_cost / mc_dep_cost; |
3653 | 0 | beta = r0 / rk; |
3654 | 0 | dr = vp9_get_adaptive_rdmult(cpi, beta); |
3655 | |
|
3656 | 0 | dr = clamp(dr, orig_rdmult * 1 / 2, orig_rdmult * 3 / 2); |
3657 | 0 | dr = VPXMAX(1, dr); |
3658 | |
|
3659 | 0 | return dr; |
3660 | 0 | } |
3661 | | #endif // !CONFIG_REALTIME_ONLY |
3662 | | |
3663 | | #if !CONFIG_REALTIME_ONLY |
3664 | | // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are |
3665 | | // unlikely to be selected depending on previous rate-distortion optimization |
3666 | | // results, for encoding speed-up. |
3667 | | static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td, |
3668 | | TileDataEnc *tile_data, TOKENEXTRA **tp, |
3669 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
3670 | | RD_COST *rd_cost, RD_COST best_rdc, |
3671 | 4.05M | PC_TREE *pc_tree) { |
3672 | 4.05M | VP9_COMMON *const cm = &cpi->common; |
3673 | 4.05M | const VP9EncoderConfig *const oxcf = &cpi->oxcf; |
3674 | 4.05M | TileInfo *const tile_info = &tile_data->tile_info; |
3675 | 4.05M | MACROBLOCK *const x = &td->mb; |
3676 | 4.05M | MACROBLOCKD *const xd = &x->e_mbd; |
3677 | 4.05M | const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; |
3678 | 4.05M | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; |
3679 | 4.05M | PARTITION_CONTEXT sl[8], sa[8]; |
3680 | 4.05M | TOKENEXTRA *tp_orig = *tp; |
3681 | 4.05M | PICK_MODE_CONTEXT *const ctx = &pc_tree->none; |
3682 | 4.05M | int i; |
3683 | 4.05M | const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
3684 | 4.05M | BLOCK_SIZE subsize; |
3685 | 4.05M | RD_COST this_rdc, sum_rdc; |
3686 | 4.05M | int do_split = bsize >= BLOCK_8X8; |
3687 | 4.05M | int do_rect = 1; |
3688 | 4.05M | INTERP_FILTER pred_interp_filter; |
3689 | | |
3690 | | // Override skipping rectangular partition operations for edge blocks |
3691 | 4.05M | const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); |
3692 | 4.05M | const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); |
3693 | 4.05M | const int xss = x->e_mbd.plane[1].subsampling_x; |
3694 | 4.05M | const int yss = x->e_mbd.plane[1].subsampling_y; |
3695 | | |
3696 | 4.05M | BLOCK_SIZE min_size = x->min_partition_size; |
3697 | 4.05M | BLOCK_SIZE max_size = x->max_partition_size; |
3698 | | |
3699 | 4.05M | int partition_none_allowed = !force_horz_split && !force_vert_split; |
3700 | 4.05M | int partition_horz_allowed = |
3701 | 4.05M | !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; |
3702 | 4.05M | int partition_vert_allowed = |
3703 | 4.05M | !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; |
3704 | | |
3705 | 4.05M | int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; |
3706 | 4.05M | int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; |
3707 | 4.05M | int must_split = 0; |
3708 | 4.05M | int should_encode_sb = 0; |
3709 | | |
3710 | | // Ref frames picked in the [i_th] quarter subblock during square partition |
3711 | | // RD search. It may be used to prune ref frame selection of rect partitions. |
3712 | 4.05M | uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; |
3713 | | |
3714 | 4.05M | int partition_mul = x->cb_rdmult; |
3715 | | |
3716 | 4.05M | (void)*tp_orig; |
3717 | | |
3718 | 4.05M | assert(num_8x8_blocks_wide_lookup[bsize] == |
3719 | 4.05M | num_8x8_blocks_high_lookup[bsize]); |
3720 | | |
3721 | 4.05M | dist_breakout_thr >>= |
3722 | 4.05M | 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); |
3723 | | |
3724 | 4.05M | rate_breakout_thr *= num_pels_log2_lookup[bsize]; |
3725 | | |
3726 | 4.05M | vp9_rd_cost_init(&this_rdc); |
3727 | 4.05M | vp9_rd_cost_init(&sum_rdc); |
3728 | | |
3729 | 4.05M | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
3730 | | |
3731 | 4.05M | if (oxcf->tuning == VP8_TUNE_SSIM) { |
3732 | 0 | set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul); |
3733 | 0 | } |
3734 | 4.05M | vp9_rd_cost_update(partition_mul, x->rddiv, &best_rdc); |
3735 | | |
3736 | 4.05M | if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ && |
3737 | 0 | cpi->oxcf.aq_mode != LOOKAHEAD_AQ) |
3738 | 0 | x->mb_energy = vp9_block_energy(cpi, x, bsize); |
3739 | | |
3740 | 4.05M | if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { |
3741 | 0 | int cb_partition_search_ctrl = |
3742 | 0 | ((pc_tree->index == 0 || pc_tree->index == 3) + |
3743 | 0 | get_chessboard_index(cm->current_video_frame)) & |
3744 | 0 | 0x1; |
3745 | |
|
3746 | 0 | if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size) |
3747 | 0 | set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); |
3748 | 0 | } |
3749 | | |
3750 | | // Get sub block energy range |
3751 | 4.05M | if (bsize >= BLOCK_16X16) { |
3752 | 1.25M | int min_energy, max_energy; |
3753 | 1.25M | vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, |
3754 | 1.25M | &max_energy); |
3755 | 1.25M | must_split = (min_energy < -3) && (max_energy - min_energy > 2); |
3756 | 1.25M | } |
3757 | | |
3758 | | // Determine partition types in search according to the speed features. |
3759 | | // The threshold set here has to be of square block size. |
3760 | 4.05M | if (cpi->sf.auto_min_max_partition_size) { |
3761 | 0 | partition_none_allowed &= (bsize <= max_size); |
3762 | 0 | partition_horz_allowed &= |
3763 | 0 | ((bsize <= max_size && bsize > min_size) || force_horz_split); |
3764 | 0 | partition_vert_allowed &= |
3765 | 0 | ((bsize <= max_size && bsize > min_size) || force_vert_split); |
3766 | 0 | do_split &= bsize > min_size; |
3767 | 0 | } |
3768 | | |
3769 | 4.05M | if (cpi->sf.use_square_partition_only && |
3770 | 2.48M | (bsize > cpi->sf.use_square_only_thresh_high || |
3771 | 2.46M | bsize < cpi->sf.use_square_only_thresh_low)) { |
3772 | 777k | if (cpi->use_svc) { |
3773 | 0 | if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) |
3774 | 0 | partition_horz_allowed &= force_horz_split; |
3775 | 0 | if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) |
3776 | 0 | partition_vert_allowed &= force_vert_split; |
3777 | 777k | } else { |
3778 | 777k | partition_horz_allowed &= force_horz_split; |
3779 | 777k | partition_vert_allowed &= force_vert_split; |
3780 | 777k | } |
3781 | 777k | } |
3782 | | |
3783 | 4.05M | save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
3784 | | |
3785 | 4.05M | pc_tree->partitioning = PARTITION_NONE; |
3786 | | |
3787 | 4.05M | if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) { |
3788 | 994k | const int do_rd_ml_partition_var_pruning = |
3789 | 994k | partition_none_allowed && do_split && |
3790 | 947k | mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && |
3791 | 943k | mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; |
3792 | 994k | if (do_rd_ml_partition_var_pruning) { |
3793 | 942k | ml_predict_var_rd_partitioning(cpi, x, pc_tree, bsize, mi_row, mi_col, |
3794 | 942k | &partition_none_allowed, &do_split); |
3795 | | // ml_predict_var_rd_partitioning() may pruune out either |
3796 | | // partition_none_allowed or do_split, but we should keep the |
3797 | | // partition_none_allowed for 8x8 blocks unless disable_split_mask is |
3798 | | // off (0). |
3799 | 942k | if (bsize == BLOCK_8X8 && cpi->sf.disable_split_mask && |
3800 | 703k | partition_none_allowed == 0) { |
3801 | 39.1k | partition_none_allowed = 1; |
3802 | 39.1k | } |
3803 | 942k | } else { |
3804 | 51.7k | vp9_zero(pc_tree->mv); |
3805 | 51.7k | } |
3806 | 994k | if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks. |
3807 | 1.45M | for (i = 0; i < 4; ++i) pc_tree->u.split[i]->mv = pc_tree->mv; |
3808 | 290k | } |
3809 | 994k | } |
3810 | | |
3811 | | // PARTITION_NONE |
3812 | 4.05M | if (partition_none_allowed) { |
3813 | 3.72M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, |
3814 | 3.72M | best_rdc.rate, best_rdc.dist); |
3815 | 3.72M | ctx->rdcost = this_rdc.rdcost; |
3816 | 3.72M | if (this_rdc.rate != INT_MAX) { |
3817 | 3.43M | if (cpi->sf.prune_ref_frame_for_rect_partitions) { |
3818 | 1.26M | const int ref1 = ctx->mic.ref_frame[0]; |
3819 | 1.26M | const int ref2 = ctx->mic.ref_frame[1]; |
3820 | 6.34M | for (i = 0; i < 4; ++i) { |
3821 | 5.07M | ref_frames_used[i] |= (1 << ref1); |
3822 | 5.07M | if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); |
3823 | 5.07M | } |
3824 | 1.26M | } |
3825 | 3.43M | if (bsize >= BLOCK_8X8) { |
3826 | 3.43M | this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
3827 | 3.43M | vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc); |
3828 | 3.43M | } |
3829 | | |
3830 | 3.43M | if (this_rdc.rdcost < best_rdc.rdcost) { |
3831 | 3.38M | MODE_INFO *mi = xd->mi[0]; |
3832 | | |
3833 | 3.38M | best_rdc = this_rdc; |
3834 | 3.38M | should_encode_sb = 1; |
3835 | 3.38M | if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; |
3836 | | |
3837 | 3.38M | if (cpi->sf.rd_ml_partition.search_early_termination) { |
3838 | | // Currently, the machine-learning based partition search early |
3839 | | // termination is only used while bsize is 16x16, 32x32 or 64x64, |
3840 | | // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. |
3841 | 0 | if (!x->e_mbd.lossless && |
3842 | 0 | !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && |
3843 | 0 | ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { |
3844 | 0 | if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) { |
3845 | 0 | do_split = 0; |
3846 | 0 | do_rect = 0; |
3847 | 0 | } |
3848 | 0 | } |
3849 | 0 | } |
3850 | | |
3851 | 3.38M | if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { |
3852 | 245k | const int use_ml_based_breakout = |
3853 | 245k | cpi->sf.rd_ml_partition.search_breakout && cm->base_qindex >= 100; |
3854 | 245k | if (use_ml_based_breakout) { |
3855 | 40.3k | if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { |
3856 | 12.0k | do_split = 0; |
3857 | 12.0k | do_rect = 0; |
3858 | 12.0k | } |
3859 | 205k | } else { |
3860 | 205k | if (!cpi->sf.rd_ml_partition.search_early_termination) { |
3861 | 205k | if ((best_rdc.dist < (dist_breakout_thr >> 2)) || |
3862 | 124k | (best_rdc.dist < dist_breakout_thr && |
3863 | 80.9k | best_rdc.rate < rate_breakout_thr)) { |
3864 | 80.9k | do_split = 0; |
3865 | 80.9k | do_rect = 0; |
3866 | 80.9k | } |
3867 | 205k | } |
3868 | 205k | } |
3869 | 245k | } |
3870 | 3.38M | } |
3871 | 3.43M | } |
3872 | 3.72M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
3873 | 3.72M | } else { |
3874 | 334k | vp9_zero(ctx->pred_mv); |
3875 | 334k | ctx->mic.interp_filter = EIGHTTAP; |
3876 | 334k | } |
3877 | | |
3878 | | // store estimated motion vector |
3879 | 4.05M | store_pred_mv(x, ctx); |
3880 | | |
3881 | | // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an |
3882 | | // intra block and used for context purposes. |
3883 | 4.05M | if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) { |
3884 | 2.70M | pred_interp_filter = EIGHTTAP; |
3885 | 2.70M | } else { |
3886 | 1.35M | pred_interp_filter = ctx->mic.interp_filter; |
3887 | 1.35M | } |
3888 | | |
3889 | | // PARTITION_SPLIT |
3890 | | // TODO(jingning): use the motion vectors given by the above search as |
3891 | | // the starting point of motion search in the following partition type check. |
3892 | 4.05M | pc_tree->u.split[0]->none.rdcost = 0; |
3893 | 4.05M | pc_tree->u.split[1]->none.rdcost = 0; |
3894 | 4.05M | pc_tree->u.split[2]->none.rdcost = 0; |
3895 | 4.05M | pc_tree->u.split[3]->none.rdcost = 0; |
3896 | 4.05M | if (do_split || must_split) { |
3897 | 3.86M | subsize = get_subsize(bsize, PARTITION_SPLIT); |
3898 | 3.86M | load_pred_mv(x, ctx); |
3899 | 3.86M | if (bsize == BLOCK_8X8) { |
3900 | 2.63M | i = 4; |
3901 | 2.63M | if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) |
3902 | 2.63M | pc_tree->u.leaf_split[0]->pred_interp_filter = pred_interp_filter; |
3903 | 2.63M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
3904 | 2.63M | pc_tree->u.leaf_split[0], best_rdc.rate, best_rdc.dist); |
3905 | 2.63M | if (sum_rdc.rate == INT_MAX) { |
3906 | 1.08M | sum_rdc.rdcost = INT64_MAX; |
3907 | 1.55M | } else { |
3908 | 1.55M | if (cpi->sf.prune_ref_frame_for_rect_partitions) { |
3909 | 523k | const int ref1 = pc_tree->u.leaf_split[0]->mic.ref_frame[0]; |
3910 | 523k | const int ref2 = pc_tree->u.leaf_split[0]->mic.ref_frame[1]; |
3911 | 2.61M | for (i = 0; i < 4; ++i) { |
3912 | 2.09M | ref_frames_used[i] |= (1 << ref1); |
3913 | 2.09M | if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); |
3914 | 2.09M | } |
3915 | 523k | } |
3916 | 1.55M | } |
3917 | 2.63M | } else { |
3918 | 5.48M | for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); |
3919 | 4.54M | ++i) { |
3920 | 4.54M | const int x_idx = (i & 1) * mi_step; |
3921 | 4.54M | const int y_idx = (i >> 1) * mi_step; |
3922 | 4.54M | int found_best_rd = 0; |
3923 | 4.54M | RD_COST best_rdc_split; |
3924 | 4.54M | vp9_rd_cost_reset(&best_rdc_split); |
3925 | | |
3926 | 4.54M | if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) { |
3927 | | // A must split test here increases the number of sub |
3928 | | // partitions but hurts metrics results quite a bit, |
3929 | | // so this extra test is commented out pending |
3930 | | // further tests on whether it adds much in terms of |
3931 | | // visual quality. |
3932 | | // (must_split) ? best_rdc.rate |
3933 | | // : best_rdc.rate - sum_rdc.rate, |
3934 | | // (must_split) ? best_rdc.dist |
3935 | | // : best_rdc.dist - sum_rdc.dist, |
3936 | 3.44M | best_rdc_split.rate = best_rdc.rate - sum_rdc.rate; |
3937 | 3.44M | best_rdc_split.dist = best_rdc.dist - sum_rdc.dist; |
3938 | 3.44M | } |
3939 | | |
3940 | 4.54M | if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) |
3941 | 605k | continue; |
3942 | | |
3943 | 3.94M | pc_tree->u.split[i]->index = i; |
3944 | 3.94M | if (cpi->sf.prune_ref_frame_for_rect_partitions) |
3945 | 1.51M | pc_tree->u.split[i]->none.rate = INT_MAX; |
3946 | 3.94M | found_best_rd = rd_pick_partition( |
3947 | 3.94M | cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, |
3948 | 3.94M | &this_rdc, best_rdc_split, pc_tree->u.split[i]); |
3949 | | |
3950 | 3.94M | if (found_best_rd == 0) { |
3951 | 295k | sum_rdc.rdcost = INT64_MAX; |
3952 | 295k | break; |
3953 | 3.64M | } else { |
3954 | 3.64M | if (cpi->sf.prune_ref_frame_for_rect_partitions && |
3955 | 1.39M | pc_tree->u.split[i]->none.rate != INT_MAX) { |
3956 | 1.23M | const int ref1 = pc_tree->u.split[i]->none.mic.ref_frame[0]; |
3957 | 1.23M | const int ref2 = pc_tree->u.split[i]->none.mic.ref_frame[1]; |
3958 | 1.23M | ref_frames_used[i] |= (1 << ref1); |
3959 | 1.23M | if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); |
3960 | 1.23M | } |
3961 | 3.64M | sum_rdc.rate += this_rdc.rate; |
3962 | 3.64M | sum_rdc.dist += this_rdc.dist; |
3963 | 3.64M | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
3964 | 3.64M | } |
3965 | 3.94M | } |
3966 | 1.22M | } |
3967 | | |
3968 | 3.86M | if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { |
3969 | 2.27M | sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; |
3970 | 2.27M | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
3971 | | |
3972 | 2.27M | if ((sum_rdc.rdcost < best_rdc.rdcost) || |
3973 | 2.17M | (must_split && (sum_rdc.dist < best_rdc.dist))) { |
3974 | 2.17M | best_rdc = sum_rdc; |
3975 | 2.17M | should_encode_sb = 1; |
3976 | 2.17M | pc_tree->partitioning = PARTITION_SPLIT; |
3977 | | |
3978 | | // Rate and distortion based partition search termination clause. |
3979 | 2.17M | if (!cpi->sf.rd_ml_partition.search_early_termination && |
3980 | 2.17M | !x->e_mbd.lossless && |
3981 | 1.97M | ((best_rdc.dist < (dist_breakout_thr >> 2)) || |
3982 | 1.11M | (best_rdc.dist < dist_breakout_thr && |
3983 | 858k | best_rdc.rate < rate_breakout_thr))) { |
3984 | 858k | do_rect = 0; |
3985 | 858k | } |
3986 | 2.17M | } |
3987 | 2.27M | } else { |
3988 | | // skip rectangular partition test when larger block size |
3989 | | // gives better rd cost |
3990 | 1.59M | if (cpi->sf.less_rectangular_check && |
3991 | 1.59M | (bsize > cpi->sf.use_square_only_thresh_high || |
3992 | 1.58M | best_rdc.dist < dist_breakout_thr)) |
3993 | 983k | do_rect &= !partition_none_allowed; |
3994 | 1.59M | } |
3995 | 3.86M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
3996 | 3.86M | } |
3997 | | |
3998 | 4.05M | pc_tree->horizontal[0].skip_ref_frame_mask = 0; |
3999 | 4.05M | pc_tree->horizontal[1].skip_ref_frame_mask = 0; |
4000 | 4.05M | pc_tree->vertical[0].skip_ref_frame_mask = 0; |
4001 | 4.05M | pc_tree->vertical[1].skip_ref_frame_mask = 0; |
4002 | 4.05M | if (cpi->sf.prune_ref_frame_for_rect_partitions) { |
4003 | 1.56M | uint8_t used_frames; |
4004 | 1.56M | used_frames = ref_frames_used[0] | ref_frames_used[1]; |
4005 | 1.56M | if (used_frames) { |
4006 | 1.43M | pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff; |
4007 | 1.43M | } |
4008 | 1.56M | used_frames = ref_frames_used[2] | ref_frames_used[3]; |
4009 | 1.56M | if (used_frames) { |
4010 | 1.35M | pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff; |
4011 | 1.35M | } |
4012 | 1.56M | used_frames = ref_frames_used[0] | ref_frames_used[2]; |
4013 | 1.56M | if (used_frames) { |
4014 | 1.43M | pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff; |
4015 | 1.43M | } |
4016 | 1.56M | used_frames = ref_frames_used[1] | ref_frames_used[3]; |
4017 | 1.56M | if (used_frames) { |
4018 | 1.39M | pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff; |
4019 | 1.39M | } |
4020 | 1.56M | } |
4021 | | |
4022 | 4.05M | { |
4023 | 4.05M | const int do_ml_rect_partition_pruning = |
4024 | 4.05M | !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split && |
4025 | 2.36M | (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; |
4026 | 4.05M | if (do_ml_rect_partition_pruning) { |
4027 | 589k | ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, |
4028 | 589k | &partition_vert_allowed, best_rdc.rdcost); |
4029 | 589k | } |
4030 | 4.05M | } |
4031 | | |
4032 | | // PARTITION_HORZ |
4033 | 4.05M | if (partition_horz_allowed && |
4034 | 2.99M | (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { |
4035 | 1.83M | const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ]; |
4036 | 1.83M | subsize = get_subsize(bsize, PARTITION_HORZ); |
4037 | 1.83M | load_pred_mv(x, ctx); |
4038 | 1.83M | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4039 | 1.13M | partition_none_allowed) |
4040 | 1.13M | pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter; |
4041 | 1.83M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4042 | 1.83M | &pc_tree->horizontal[0], best_rdc.rate - part_mode_rate, |
4043 | 1.83M | best_rdc.dist); |
4044 | 1.83M | if (sum_rdc.rdcost < INT64_MAX) { |
4045 | 1.03M | sum_rdc.rate += part_mode_rate; |
4046 | 1.03M | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4047 | 1.03M | } |
4048 | | |
4049 | 1.83M | if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && |
4050 | 716k | bsize > BLOCK_8X8) { |
4051 | 437k | PICK_MODE_CONTEXT *hctx = &pc_tree->horizontal[0]; |
4052 | 437k | update_state(cpi, td, hctx, mi_row, mi_col, subsize, 0); |
4053 | 437k | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, hctx); |
4054 | 437k | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4055 | 0 | partition_none_allowed) |
4056 | 0 | pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter; |
4057 | 437k | rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, |
4058 | 437k | subsize, &pc_tree->horizontal[1], |
4059 | 437k | best_rdc.rate - sum_rdc.rate, |
4060 | 437k | best_rdc.dist - sum_rdc.dist); |
4061 | 437k | if (this_rdc.rate == INT_MAX) { |
4062 | 297k | sum_rdc.rdcost = INT64_MAX; |
4063 | 297k | } else { |
4064 | 140k | sum_rdc.rate += this_rdc.rate; |
4065 | 140k | sum_rdc.dist += this_rdc.dist; |
4066 | 140k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4067 | 140k | } |
4068 | 437k | } |
4069 | | |
4070 | 1.83M | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4071 | 368k | best_rdc = sum_rdc; |
4072 | 368k | should_encode_sb = 1; |
4073 | 368k | pc_tree->partitioning = PARTITION_HORZ; |
4074 | | |
4075 | 368k | if (cpi->sf.less_rectangular_check && |
4076 | 368k | bsize > cpi->sf.use_square_only_thresh_high) |
4077 | 4.64k | do_rect = 0; |
4078 | 368k | } |
4079 | 1.83M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4080 | 1.83M | } |
4081 | | |
4082 | | // PARTITION_VERT |
4083 | 4.05M | if (partition_vert_allowed && |
4084 | 2.91M | (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { |
4085 | 1.73M | const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT]; |
4086 | 1.73M | subsize = get_subsize(bsize, PARTITION_VERT); |
4087 | 1.73M | load_pred_mv(x, ctx); |
4088 | 1.73M | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4089 | 1.13M | partition_none_allowed) |
4090 | 1.13M | pc_tree->vertical[0].pred_interp_filter = pred_interp_filter; |
4091 | 1.73M | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4092 | 1.73M | &pc_tree->vertical[0], best_rdc.rate - part_mode_rate, |
4093 | 1.73M | best_rdc.dist); |
4094 | 1.73M | if (sum_rdc.rdcost < INT64_MAX) { |
4095 | 889k | sum_rdc.rate += part_mode_rate; |
4096 | 889k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4097 | 889k | } |
4098 | | |
4099 | 1.73M | if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && |
4100 | 578k | bsize > BLOCK_8X8) { |
4101 | 446k | update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); |
4102 | 446k | encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, |
4103 | 446k | &pc_tree->vertical[0]); |
4104 | 446k | if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && |
4105 | 0 | partition_none_allowed) |
4106 | 0 | pc_tree->vertical[1].pred_interp_filter = pred_interp_filter; |
4107 | 446k | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, |
4108 | 446k | subsize, &pc_tree->vertical[1], |
4109 | 446k | best_rdc.rate - sum_rdc.rate, |
4110 | 446k | best_rdc.dist - sum_rdc.dist); |
4111 | 446k | if (this_rdc.rate == INT_MAX) { |
4112 | 329k | sum_rdc.rdcost = INT64_MAX; |
4113 | 329k | } else { |
4114 | 117k | sum_rdc.rate += this_rdc.rate; |
4115 | 117k | sum_rdc.dist += this_rdc.dist; |
4116 | 117k | vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); |
4117 | 117k | } |
4118 | 446k | } |
4119 | | |
4120 | 1.73M | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4121 | 184k | best_rdc = sum_rdc; |
4122 | 184k | should_encode_sb = 1; |
4123 | 184k | pc_tree->partitioning = PARTITION_VERT; |
4124 | 184k | } |
4125 | 1.73M | restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); |
4126 | 1.73M | } |
4127 | | |
4128 | 4.05M | if (bsize == BLOCK_64X64 && best_rdc.rdcost == INT64_MAX) { |
4129 | 0 | vp9_rd_cost_reset(&this_rdc); |
4130 | 0 | rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, BLOCK_64X64, |
4131 | 0 | ctx, INT_MAX, INT64_MAX); |
4132 | 0 | ctx->rdcost = this_rdc.rdcost; |
4133 | 0 | vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc); |
4134 | 0 | if (this_rdc.rdcost < best_rdc.rdcost) { |
4135 | 0 | best_rdc = this_rdc; |
4136 | 0 | should_encode_sb = 1; |
4137 | 0 | pc_tree->partitioning = PARTITION_NONE; |
4138 | 0 | } |
4139 | 0 | } |
4140 | | |
4141 | 4.05M | *rd_cost = best_rdc; |
4142 | | |
4143 | 4.05M | if (should_encode_sb && pc_tree->index != 3) { |
4144 | 3.10M | int output_enabled = (bsize == BLOCK_64X64); |
4145 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4146 | | start_timing(cpi, encode_sb_time); |
4147 | | #endif |
4148 | 3.10M | encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, |
4149 | 3.10M | pc_tree); |
4150 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4151 | | end_timing(cpi, encode_sb_time); |
4152 | | #endif |
4153 | 3.10M | } |
4154 | | |
4155 | 4.05M | if (bsize == BLOCK_64X64) { |
4156 | 116k | assert(tp_orig < *tp); |
4157 | 116k | assert(best_rdc.rate < INT_MAX); |
4158 | 116k | assert(best_rdc.dist < INT64_MAX); |
4159 | 3.94M | } else { |
4160 | 3.94M | assert(tp_orig == *tp); |
4161 | 3.94M | } |
4162 | | |
4163 | 4.05M | return should_encode_sb; |
4164 | 4.05M | } |
4165 | | |
4166 | | static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, |
4167 | | TileDataEnc *tile_data, int mi_row, |
4168 | 76.1k | TOKENEXTRA **tp) { |
4169 | 76.1k | VP9_COMMON *const cm = &cpi->common; |
4170 | 76.1k | TileInfo *const tile_info = &tile_data->tile_info; |
4171 | 76.1k | MACROBLOCK *const x = &td->mb; |
4172 | 76.1k | MACROBLOCKD *const xd = &x->e_mbd; |
4173 | 76.1k | SPEED_FEATURES *const sf = &cpi->sf; |
4174 | 76.1k | const int mi_col_start = tile_info->mi_col_start; |
4175 | 76.1k | const int mi_col_end = tile_info->mi_col_end; |
4176 | 76.1k | int mi_col; |
4177 | 76.1k | const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; |
4178 | 76.1k | const int num_sb_cols = |
4179 | 76.1k | get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); |
4180 | 76.1k | int sb_col_in_tile; |
4181 | | |
4182 | | // Initialize the left context for the new SB row |
4183 | 76.1k | memset(&xd->left_context, 0, sizeof(xd->left_context)); |
4184 | 76.1k | memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); |
4185 | | |
4186 | | // Code each SB in the row |
4187 | 192k | for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; |
4188 | 116k | mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) { |
4189 | 116k | const struct segmentation *const seg = &cm->seg; |
4190 | 116k | int dummy_rate; |
4191 | 116k | int64_t dummy_dist; |
4192 | 116k | RD_COST dummy_rdc; |
4193 | 116k | int i; |
4194 | 116k | int seg_skip = 0; |
4195 | 116k | int orig_rdmult = cpi->rd.RDMULT; |
4196 | | |
4197 | 116k | const int idx_str = cm->mi_stride * mi_row + mi_col; |
4198 | 116k | MODE_INFO **mi = cm->mi_grid_visible + idx_str; |
4199 | | |
4200 | 116k | vp9_rd_cost_reset(&dummy_rdc); |
4201 | 116k | (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, |
4202 | 116k | sb_col_in_tile); |
4203 | | |
4204 | 116k | if (sf->adaptive_pred_interp_filter) { |
4205 | 7.54M | for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; |
4206 | | |
4207 | 7.54M | for (i = 0; i < 64; ++i) { |
4208 | 7.43M | td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; |
4209 | 7.43M | td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; |
4210 | 7.43M | td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; |
4211 | 7.43M | td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; |
4212 | 7.43M | } |
4213 | 116k | } |
4214 | | |
4215 | 580k | for (i = 0; i < MAX_REF_FRAMES; ++i) { |
4216 | 464k | x->pred_mv[i].row = INT16_MAX; |
4217 | 464k | x->pred_mv[i].col = INT16_MAX; |
4218 | 464k | } |
4219 | 116k | td->pc_root->index = 0; |
4220 | | |
4221 | 116k | if (seg->enabled) { |
4222 | 0 | const uint8_t *const map = |
4223 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
4224 | 0 | int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); |
4225 | 0 | seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); |
4226 | 0 | } |
4227 | | |
4228 | 116k | x->source_variance = UINT_MAX; |
4229 | | |
4230 | 116k | x->cb_rdmult = orig_rdmult; |
4231 | | |
4232 | 116k | if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { |
4233 | 0 | const BLOCK_SIZE bsize = |
4234 | 0 | seg_skip ? BLOCK_64X64 : sf->always_this_block_size; |
4235 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); |
4236 | 0 | set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); |
4237 | 0 | rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, |
4238 | 0 | &dummy_rate, &dummy_dist, 1, td->pc_root); |
4239 | 116k | } else if (sf->partition_search_type == VAR_BASED_PARTITION && |
4240 | 0 | cm->frame_type != KEY_FRAME) { |
4241 | 0 | choose_partitioning(cpi, tile_info, x, mi_row, mi_col); |
4242 | 0 | rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, |
4243 | 0 | &dummy_rate, &dummy_dist, 1, td->pc_root); |
4244 | 116k | } else { |
4245 | 116k | if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) { |
4246 | 0 | int dr = |
4247 | 0 | get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult); |
4248 | 0 | x->cb_rdmult = dr; |
4249 | 0 | } |
4250 | | |
4251 | 116k | if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ && cm->show_frame) { |
4252 | 0 | x->segment_id = wiener_var_segment(cpi, BLOCK_64X64, mi_row, mi_col); |
4253 | 0 | x->cb_rdmult = vp9_compute_rd_mult( |
4254 | 0 | cpi, vp9_get_qindex(&cm->seg, x->segment_id, cm->base_qindex)); |
4255 | 0 | } |
4256 | | |
4257 | | // If required set upper and lower partition size limits |
4258 | 116k | if (sf->auto_min_max_partition_size) { |
4259 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); |
4260 | 0 | rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, |
4261 | 0 | &x->min_partition_size, &x->max_partition_size); |
4262 | 0 | } |
4263 | 116k | td->pc_root->none.rdcost = 0; |
4264 | | |
4265 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4266 | | start_timing(cpi, rd_pick_partition_time); |
4267 | | #endif |
4268 | 116k | rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, |
4269 | 116k | &dummy_rdc, dummy_rdc, td->pc_root); |
4270 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
4271 | | end_timing(cpi, rd_pick_partition_time); |
4272 | | #endif |
4273 | 116k | } |
4274 | 116k | (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, |
4275 | 116k | sb_col_in_tile, num_sb_cols); |
4276 | 116k | } |
4277 | 76.1k | } |
4278 | | #endif // !CONFIG_REALTIME_ONLY |
4279 | | |
4280 | 54.1k | static void init_encode_frame_mb_context(VP9_COMP *cpi) { |
4281 | 54.1k | MACROBLOCK *const x = &cpi->td.mb; |
4282 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
4283 | 54.1k | MACROBLOCKD *const xd = &x->e_mbd; |
4284 | 54.1k | const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); |
4285 | | |
4286 | | // Copy data over into macro block data structures. |
4287 | 54.1k | vp9_setup_src_planes(x, cpi->Source, 0, 0); |
4288 | | |
4289 | 54.1k | vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); |
4290 | | |
4291 | | // Note: this memset assumes above_context[0], [1] and [2] |
4292 | | // are allocated as part of the same buffer. |
4293 | 54.1k | memset(xd->above_context[0], 0, |
4294 | 54.1k | sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); |
4295 | 54.1k | memset(xd->above_seg_context, 0, |
4296 | 54.1k | sizeof(*xd->above_seg_context) * aligned_mi_cols); |
4297 | 54.1k | } |
4298 | | |
4299 | 0 | static int check_dual_ref_flags(VP9_COMP *cpi) { |
4300 | 0 | const int ref_flags = cpi->ref_frame_flags; |
4301 | |
|
4302 | 0 | if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { |
4303 | 0 | return 0; |
4304 | 0 | } else { |
4305 | 0 | return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) + |
4306 | 0 | !!(ref_flags & VP9_ALT_FLAG)) >= 2; |
4307 | 0 | } |
4308 | 0 | } |
4309 | | |
4310 | 9.46k | static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { |
4311 | 9.46k | int mi_row, mi_col; |
4312 | 9.46k | const int mis = cm->mi_stride; |
4313 | 9.46k | MODE_INFO **mi_ptr = cm->mi_grid_visible; |
4314 | | |
4315 | 42.7k | for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { |
4316 | 247k | for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { |
4317 | 214k | if (mi_ptr[mi_col]->tx_size > max_tx_size) |
4318 | 2.18k | mi_ptr[mi_col]->tx_size = max_tx_size; |
4319 | 214k | } |
4320 | 33.2k | } |
4321 | 9.46k | } |
4322 | | |
4323 | 54.1k | static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { |
4324 | 54.1k | if (frame_is_intra_only(&cpi->common)) |
4325 | 12.8k | return INTRA_FRAME; |
4326 | 41.3k | else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) |
4327 | 0 | return ALTREF_FRAME; |
4328 | 41.3k | else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) |
4329 | 2.83k | return GOLDEN_FRAME; |
4330 | 38.5k | else |
4331 | 38.5k | return LAST_FRAME; |
4332 | 54.1k | } |
4333 | | |
4334 | 54.1k | static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { |
4335 | 54.1k | if (xd->lossless) return ONLY_4X4; |
4336 | 46.7k | if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode) |
4337 | 0 | return ALLOW_16X16; |
4338 | 46.7k | if (cpi->sf.tx_size_search_method == USE_LARGESTALL) |
4339 | 20.4k | return ALLOW_32X32; |
4340 | 26.3k | else if (cpi->sf.tx_size_search_method == USE_FULL_RD || |
4341 | 0 | cpi->sf.tx_size_search_method == USE_TX_8X8) |
4342 | 26.3k | return TX_MODE_SELECT; |
4343 | 0 | else |
4344 | 0 | return cpi->common.tx_mode; |
4345 | 46.7k | } |
4346 | | |
4347 | | static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x, |
4348 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4349 | 0 | PICK_MODE_CONTEXT *ctx) { |
4350 | 0 | if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16) |
4351 | 0 | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); |
4352 | 0 | else |
4353 | 0 | vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); |
4354 | 0 | } |
4355 | | |
4356 | | static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x, |
4357 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4358 | | PICK_MODE_CONTEXT *ctx, |
4359 | | TileDataEnc *tile_data, int mi_row, |
4360 | 0 | int mi_col) { |
4361 | 0 | if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { |
4362 | 0 | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); |
4363 | 0 | } else { |
4364 | 0 | if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF) |
4365 | 0 | vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); |
4366 | 0 | else if (bsize >= BLOCK_8X8) |
4367 | 0 | vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, |
4368 | 0 | ctx); |
4369 | 0 | else |
4370 | 0 | vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); |
4371 | 0 | } |
4372 | 0 | } |
4373 | | |
4374 | | static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x, |
4375 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4376 | | PICK_MODE_CONTEXT *ctx, |
4377 | | TileDataEnc *tile_data, int mi_row, |
4378 | 0 | int mi_col) { |
4379 | 0 | if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { |
4380 | 0 | vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); |
4381 | 0 | } else { |
4382 | 0 | vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); |
4383 | 0 | } |
4384 | 0 | } |
4385 | | |
4386 | | static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, |
4387 | | MACROBLOCK *const x, int mi_row, int mi_col, |
4388 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4389 | 0 | PICK_MODE_CONTEXT *ctx) { |
4390 | 0 | VP9_COMMON *const cm = &cpi->common; |
4391 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
4392 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
4393 | 0 | MODE_INFO *mi; |
4394 | 0 | ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; |
4395 | 0 | BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size |
4396 | 0 | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; |
4397 | 0 | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; |
4398 | 0 | int plane; |
4399 | |
|
4400 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); |
4401 | |
|
4402 | 0 | set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); |
4403 | |
|
4404 | 0 | x->skip_recode = 0; |
4405 | |
|
4406 | 0 | mi = xd->mi[0]; |
4407 | 0 | mi->sb_type = bsize; |
4408 | |
|
4409 | 0 | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
4410 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
4411 | 0 | memcpy(a + num_4x4_blocks_wide * plane, pd->above_context, |
4412 | 0 | (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); |
4413 | 0 | memcpy(l + num_4x4_blocks_high * plane, pd->left_context, |
4414 | 0 | (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); |
4415 | 0 | } |
4416 | |
|
4417 | 0 | if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) |
4418 | 0 | if (cyclic_refresh_segment_id_boosted(mi->segment_id)) |
4419 | 0 | x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); |
4420 | |
|
4421 | 0 | if (frame_is_intra_only(cm)) |
4422 | 0 | hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); |
4423 | 0 | else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) |
4424 | 0 | hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, |
4425 | 0 | mi_col); |
4426 | 0 | else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) |
4427 | 0 | set_mode_info_seg_skip(x, cm->tx_mode, cm->interp_filter, rd_cost, bsize); |
4428 | 0 | else if (bsize >= BLOCK_8X8) { |
4429 | 0 | if (cpi->rc.hybrid_intra_scene_change) |
4430 | 0 | hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, |
4431 | 0 | mi_col); |
4432 | 0 | else |
4433 | 0 | vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, |
4434 | 0 | ctx); |
4435 | 0 | } else { |
4436 | 0 | vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); |
4437 | 0 | } |
4438 | |
|
4439 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); |
4440 | |
|
4441 | 0 | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
4442 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
4443 | 0 | memcpy(pd->above_context, a + num_4x4_blocks_wide * plane, |
4444 | 0 | (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); |
4445 | 0 | memcpy(pd->left_context, l + num_4x4_blocks_high * plane, |
4446 | 0 | (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); |
4447 | 0 | } |
4448 | |
|
4449 | 0 | if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost); |
4450 | |
|
4451 | 0 | ctx->rate = rd_cost->rate; |
4452 | 0 | ctx->dist = rd_cost->dist; |
4453 | 0 | } |
4454 | | |
4455 | | static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row, |
4456 | 0 | int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { |
4457 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
4458 | 0 | int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
4459 | 0 | PARTITION_TYPE partition = pc_tree->partitioning; |
4460 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, partition); |
4461 | |
|
4462 | 0 | assert(bsize >= BLOCK_8X8); |
4463 | |
|
4464 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
4465 | | |
4466 | 0 | switch (partition) { |
4467 | 0 | case PARTITION_NONE: |
4468 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
4469 | 0 | *(xd->mi[0]) = pc_tree->none.mic; |
4470 | 0 | *(x->mbmi_ext) = pc_tree->none.mbmi_ext; |
4471 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); |
4472 | 0 | break; |
4473 | 0 | case PARTITION_VERT: |
4474 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
4475 | 0 | *(xd->mi[0]) = pc_tree->vertical[0].mic; |
4476 | 0 | *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext; |
4477 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); |
4478 | |
|
4479 | 0 | if (mi_col + hbs < cm->mi_cols) { |
4480 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs); |
4481 | 0 | *(xd->mi[0]) = pc_tree->vertical[1].mic; |
4482 | 0 | *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext; |
4483 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize); |
4484 | 0 | } |
4485 | 0 | break; |
4486 | 0 | case PARTITION_HORZ: |
4487 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
4488 | 0 | *(xd->mi[0]) = pc_tree->horizontal[0].mic; |
4489 | 0 | *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext; |
4490 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); |
4491 | 0 | if (mi_row + hbs < cm->mi_rows) { |
4492 | 0 | set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col); |
4493 | 0 | *(xd->mi[0]) = pc_tree->horizontal[1].mic; |
4494 | 0 | *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext; |
4495 | 0 | duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize); |
4496 | 0 | } |
4497 | 0 | break; |
4498 | 0 | case PARTITION_SPLIT: { |
4499 | 0 | fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->u.split[0]); |
4500 | 0 | fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, |
4501 | 0 | pc_tree->u.split[1]); |
4502 | 0 | fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, |
4503 | 0 | pc_tree->u.split[2]); |
4504 | 0 | fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, |
4505 | 0 | pc_tree->u.split[3]); |
4506 | 0 | break; |
4507 | 0 | } |
4508 | 0 | default: break; |
4509 | 0 | } |
4510 | 0 | } |
4511 | | |
4512 | | // Reset the prediction pixel ready flag recursively. |
4513 | 0 | static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { |
4514 | 0 | pc_tree->none.pred_pixel_ready = 0; |
4515 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 0; |
4516 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 0; |
4517 | 0 | pc_tree->vertical[0].pred_pixel_ready = 0; |
4518 | 0 | pc_tree->vertical[1].pred_pixel_ready = 0; |
4519 | |
|
4520 | 0 | if (bsize > BLOCK_8X8) { |
4521 | 0 | BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); |
4522 | 0 | int i; |
4523 | 0 | for (i = 0; i < 4; ++i) |
4524 | 0 | pred_pixel_ready_reset(pc_tree->u.split[i], subsize); |
4525 | 0 | } |
4526 | 0 | } |
4527 | | |
4528 | | #define FEATURES 6 |
4529 | | #define LABELS 2 |
4530 | | static int ml_predict_var_partitioning(VP9_COMP *cpi, MACROBLOCK *x, |
4531 | | BLOCK_SIZE bsize, int mi_row, |
4532 | 0 | int mi_col) { |
4533 | 0 | VP9_COMMON *const cm = &cpi->common; |
4534 | 0 | const NN_CONFIG *nn_config = NULL; |
4535 | |
|
4536 | 0 | switch (bsize) { |
4537 | 0 | case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break; |
4538 | 0 | case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break; |
4539 | 0 | case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break; |
4540 | 0 | case BLOCK_8X8: break; |
4541 | 0 | default: assert(0 && "Unexpected block size."); return -1; |
4542 | 0 | } |
4543 | | |
4544 | 0 | if (!nn_config) return -1; |
4545 | | |
4546 | 0 | vpx_clear_system_state(); |
4547 | |
|
4548 | 0 | { |
4549 | 0 | const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; |
4550 | 0 | float features[FEATURES] = { 0.0f }; |
4551 | 0 | const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); |
4552 | 0 | int feature_idx = 0; |
4553 | 0 | float score[LABELS]; |
4554 | |
|
4555 | 0 | features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); |
4556 | 0 | vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); |
4557 | 0 | { |
4558 | 0 | const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; |
4559 | 0 | const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); |
4560 | 0 | const int sb_offset_row = 8 * (mi_row & 7); |
4561 | 0 | const int sb_offset_col = 8 * (mi_col & 7); |
4562 | 0 | const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; |
4563 | 0 | const uint8_t *src = x->plane[0].src.buf; |
4564 | 0 | const int src_stride = x->plane[0].src.stride; |
4565 | 0 | const int pred_stride = 64; |
4566 | 0 | unsigned int sse; |
4567 | 0 | int i; |
4568 | | // Variance of whole block. |
4569 | 0 | const unsigned int var = |
4570 | 0 | cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); |
4571 | 0 | const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); |
4572 | |
|
4573 | 0 | features[feature_idx++] = logf((float)var + 1.0f); |
4574 | 0 | for (i = 0; i < 4; ++i) { |
4575 | 0 | const int x_idx = (i & 1) * bs / 2; |
4576 | 0 | const int y_idx = (i >> 1) * bs / 2; |
4577 | 0 | const int src_offset = y_idx * src_stride + x_idx; |
4578 | 0 | const int pred_offset = y_idx * pred_stride + x_idx; |
4579 | | // Variance of quarter block. |
4580 | 0 | const unsigned int sub_var = |
4581 | 0 | cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, |
4582 | 0 | pred + pred_offset, pred_stride, &sse); |
4583 | 0 | const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; |
4584 | 0 | features[feature_idx++] = var_ratio; |
4585 | 0 | } |
4586 | 0 | } |
4587 | |
|
4588 | 0 | assert(feature_idx == FEATURES); |
4589 | 0 | nn_predict(features, nn_config, score); |
4590 | 0 | if (score[0] > thresh) return PARTITION_SPLIT; |
4591 | 0 | if (score[0] < -thresh) return PARTITION_NONE; |
4592 | 0 | return -1; |
4593 | 0 | } |
4594 | 0 | } |
4595 | | #undef FEATURES |
4596 | | #undef LABELS |
4597 | | |
4598 | | static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, |
4599 | | TileDataEnc *tile_data, TOKENEXTRA **tp, |
4600 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
4601 | | RD_COST *rd_cost, int do_recon, |
4602 | 0 | int64_t best_rd, PC_TREE *pc_tree) { |
4603 | 0 | const SPEED_FEATURES *const sf = &cpi->sf; |
4604 | 0 | VP9_COMMON *const cm = &cpi->common; |
4605 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
4606 | 0 | MACROBLOCK *const x = &td->mb; |
4607 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
4608 | 0 | const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; |
4609 | 0 | TOKENEXTRA *tp_orig = *tp; |
4610 | 0 | PICK_MODE_CONTEXT *ctx = &pc_tree->none; |
4611 | 0 | int i; |
4612 | 0 | BLOCK_SIZE subsize = bsize; |
4613 | 0 | RD_COST this_rdc, sum_rdc, best_rdc; |
4614 | 0 | int do_split = bsize >= BLOCK_8X8; |
4615 | 0 | int do_rect = 1; |
4616 | | // Override skipping rectangular partition operations for edge blocks |
4617 | 0 | const int force_horz_split = (mi_row + ms >= cm->mi_rows); |
4618 | 0 | const int force_vert_split = (mi_col + ms >= cm->mi_cols); |
4619 | 0 | const int xss = x->e_mbd.plane[1].subsampling_x; |
4620 | 0 | const int yss = x->e_mbd.plane[1].subsampling_y; |
4621 | |
|
4622 | 0 | int partition_none_allowed = !force_horz_split && !force_vert_split; |
4623 | 0 | int partition_horz_allowed = |
4624 | 0 | !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; |
4625 | 0 | int partition_vert_allowed = |
4626 | 0 | !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; |
4627 | 0 | const int use_ml_based_partitioning = |
4628 | 0 | sf->partition_search_type == ML_BASED_PARTITION; |
4629 | |
|
4630 | 0 | (void)*tp_orig; |
4631 | | |
4632 | | // Avoid checking for rectangular partitions for speed >= 5. |
4633 | 0 | if (cpi->oxcf.speed >= 5) do_rect = 0; |
4634 | |
|
4635 | 0 | assert(num_8x8_blocks_wide_lookup[bsize] == |
4636 | 0 | num_8x8_blocks_high_lookup[bsize]); |
4637 | |
|
4638 | 0 | vp9_rd_cost_init(&sum_rdc); |
4639 | 0 | vp9_rd_cost_reset(&best_rdc); |
4640 | 0 | best_rdc.rdcost = best_rd; |
4641 | | |
4642 | | // Determine partition types in search according to the speed features. |
4643 | | // The threshold set here has to be of square block size. |
4644 | 0 | if (sf->auto_min_max_partition_size) { |
4645 | 0 | partition_none_allowed &= |
4646 | 0 | (bsize <= x->max_partition_size && bsize >= x->min_partition_size); |
4647 | 0 | partition_horz_allowed &= |
4648 | 0 | ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || |
4649 | 0 | force_horz_split); |
4650 | 0 | partition_vert_allowed &= |
4651 | 0 | ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || |
4652 | 0 | force_vert_split); |
4653 | 0 | do_split &= bsize > x->min_partition_size; |
4654 | 0 | } |
4655 | 0 | if (sf->use_square_partition_only) { |
4656 | 0 | partition_horz_allowed &= force_horz_split; |
4657 | 0 | partition_vert_allowed &= force_vert_split; |
4658 | 0 | } |
4659 | |
|
4660 | 0 | if (use_ml_based_partitioning) { |
4661 | 0 | if (partition_none_allowed || do_split) do_rect = 0; |
4662 | 0 | if (partition_none_allowed && do_split) { |
4663 | 0 | const int ml_predicted_partition = |
4664 | 0 | ml_predict_var_partitioning(cpi, x, bsize, mi_row, mi_col); |
4665 | 0 | if (ml_predicted_partition == PARTITION_NONE) do_split = 0; |
4666 | 0 | if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; |
4667 | 0 | } |
4668 | 0 | } |
4669 | |
|
4670 | 0 | if (!partition_none_allowed && !do_split) do_rect = 1; |
4671 | |
|
4672 | 0 | ctx->pred_pixel_ready = |
4673 | 0 | !(partition_vert_allowed || partition_horz_allowed || do_split); |
4674 | | |
4675 | | // PARTITION_NONE |
4676 | 0 | if (partition_none_allowed) { |
4677 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, |
4678 | 0 | ctx); |
4679 | 0 | ctx->mic = *xd->mi[0]; |
4680 | 0 | ctx->mbmi_ext = *x->mbmi_ext; |
4681 | 0 | ctx->skip_txfm[0] = x->skip_txfm[0]; |
4682 | 0 | ctx->skip = x->skip; |
4683 | |
|
4684 | 0 | if (this_rdc.rate != INT_MAX) { |
4685 | 0 | const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
4686 | 0 | this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; |
4687 | 0 | this_rdc.rdcost = |
4688 | 0 | RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); |
4689 | 0 | if (this_rdc.rdcost < best_rdc.rdcost) { |
4690 | 0 | best_rdc = this_rdc; |
4691 | 0 | if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; |
4692 | |
|
4693 | 0 | if (!use_ml_based_partitioning) { |
4694 | 0 | int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; |
4695 | 0 | int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; |
4696 | 0 | dist_breakout_thr >>= |
4697 | 0 | 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); |
4698 | 0 | rate_breakout_thr *= num_pels_log2_lookup[bsize]; |
4699 | 0 | if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && |
4700 | 0 | this_rdc.dist < dist_breakout_thr) { |
4701 | 0 | do_split = 0; |
4702 | 0 | do_rect = 0; |
4703 | 0 | } |
4704 | 0 | } |
4705 | 0 | } |
4706 | 0 | } |
4707 | 0 | } |
4708 | | |
4709 | | // store estimated motion vector |
4710 | 0 | store_pred_mv(x, ctx); |
4711 | | |
4712 | | // PARTITION_SPLIT |
4713 | 0 | if (do_split) { |
4714 | 0 | int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
4715 | 0 | sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; |
4716 | 0 | sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); |
4717 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
4718 | 0 | for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { |
4719 | 0 | const int x_idx = (i & 1) * ms; |
4720 | 0 | const int y_idx = (i >> 1) * ms; |
4721 | |
|
4722 | 0 | if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) |
4723 | 0 | continue; |
4724 | 0 | load_pred_mv(x, ctx); |
4725 | 0 | nonrd_pick_partition( |
4726 | 0 | cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, |
4727 | 0 | &this_rdc, 0, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->u.split[i]); |
4728 | |
|
4729 | 0 | if (this_rdc.rate == INT_MAX) { |
4730 | 0 | vp9_rd_cost_reset(&sum_rdc); |
4731 | 0 | } else { |
4732 | 0 | sum_rdc.rate += this_rdc.rate; |
4733 | 0 | sum_rdc.dist += this_rdc.dist; |
4734 | 0 | sum_rdc.rdcost += this_rdc.rdcost; |
4735 | 0 | } |
4736 | 0 | } |
4737 | |
|
4738 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4739 | 0 | best_rdc = sum_rdc; |
4740 | 0 | pc_tree->partitioning = PARTITION_SPLIT; |
4741 | 0 | } else { |
4742 | | // skip rectangular partition test when larger block size |
4743 | | // gives better rd cost |
4744 | 0 | if (sf->less_rectangular_check) do_rect &= !partition_none_allowed; |
4745 | 0 | } |
4746 | 0 | } |
4747 | | |
4748 | | // PARTITION_HORZ |
4749 | 0 | if (partition_horz_allowed && do_rect) { |
4750 | 0 | subsize = get_subsize(bsize, PARTITION_HORZ); |
4751 | 0 | load_pred_mv(x, ctx); |
4752 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 1; |
4753 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4754 | 0 | &pc_tree->horizontal[0]); |
4755 | |
|
4756 | 0 | pc_tree->horizontal[0].mic = *xd->mi[0]; |
4757 | 0 | pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; |
4758 | 0 | pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; |
4759 | 0 | pc_tree->horizontal[0].skip = x->skip; |
4760 | |
|
4761 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { |
4762 | 0 | load_pred_mv(x, ctx); |
4763 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 1; |
4764 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc, |
4765 | 0 | subsize, &pc_tree->horizontal[1]); |
4766 | |
|
4767 | 0 | pc_tree->horizontal[1].mic = *xd->mi[0]; |
4768 | 0 | pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; |
4769 | 0 | pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; |
4770 | 0 | pc_tree->horizontal[1].skip = x->skip; |
4771 | |
|
4772 | 0 | if (this_rdc.rate == INT_MAX) { |
4773 | 0 | vp9_rd_cost_reset(&sum_rdc); |
4774 | 0 | } else { |
4775 | 0 | int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
4776 | 0 | this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; |
4777 | 0 | sum_rdc.rate += this_rdc.rate; |
4778 | 0 | sum_rdc.dist += this_rdc.dist; |
4779 | 0 | sum_rdc.rdcost = |
4780 | 0 | RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); |
4781 | 0 | } |
4782 | 0 | } |
4783 | |
|
4784 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4785 | 0 | best_rdc = sum_rdc; |
4786 | 0 | pc_tree->partitioning = PARTITION_HORZ; |
4787 | 0 | } else { |
4788 | 0 | pred_pixel_ready_reset(pc_tree, bsize); |
4789 | 0 | } |
4790 | 0 | } |
4791 | | |
4792 | | // PARTITION_VERT |
4793 | 0 | if (partition_vert_allowed && do_rect) { |
4794 | 0 | subsize = get_subsize(bsize, PARTITION_VERT); |
4795 | 0 | load_pred_mv(x, ctx); |
4796 | 0 | pc_tree->vertical[0].pred_pixel_ready = 1; |
4797 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, |
4798 | 0 | &pc_tree->vertical[0]); |
4799 | 0 | pc_tree->vertical[0].mic = *xd->mi[0]; |
4800 | 0 | pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; |
4801 | 0 | pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; |
4802 | 0 | pc_tree->vertical[0].skip = x->skip; |
4803 | |
|
4804 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { |
4805 | 0 | load_pred_mv(x, ctx); |
4806 | 0 | pc_tree->vertical[1].pred_pixel_ready = 1; |
4807 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, |
4808 | 0 | subsize, &pc_tree->vertical[1]); |
4809 | 0 | pc_tree->vertical[1].mic = *xd->mi[0]; |
4810 | 0 | pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; |
4811 | 0 | pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; |
4812 | 0 | pc_tree->vertical[1].skip = x->skip; |
4813 | |
|
4814 | 0 | if (this_rdc.rate == INT_MAX) { |
4815 | 0 | vp9_rd_cost_reset(&sum_rdc); |
4816 | 0 | } else { |
4817 | 0 | int pl = partition_plane_context(xd, mi_row, mi_col, bsize); |
4818 | 0 | sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; |
4819 | 0 | sum_rdc.rate += this_rdc.rate; |
4820 | 0 | sum_rdc.dist += this_rdc.dist; |
4821 | 0 | sum_rdc.rdcost = |
4822 | 0 | RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); |
4823 | 0 | } |
4824 | 0 | } |
4825 | |
|
4826 | 0 | if (sum_rdc.rdcost < best_rdc.rdcost) { |
4827 | 0 | best_rdc = sum_rdc; |
4828 | 0 | pc_tree->partitioning = PARTITION_VERT; |
4829 | 0 | } else { |
4830 | 0 | pred_pixel_ready_reset(pc_tree, bsize); |
4831 | 0 | } |
4832 | 0 | } |
4833 | |
|
4834 | 0 | *rd_cost = best_rdc; |
4835 | |
|
4836 | 0 | if (best_rdc.rate == INT_MAX) { |
4837 | 0 | vp9_rd_cost_reset(rd_cost); |
4838 | 0 | return; |
4839 | 0 | } |
4840 | | |
4841 | | // update mode info array |
4842 | 0 | fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree); |
4843 | |
|
4844 | 0 | if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) { |
4845 | 0 | int output_enabled = (bsize == BLOCK_64X64); |
4846 | 0 | encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, |
4847 | 0 | pc_tree); |
4848 | 0 | } |
4849 | |
|
4850 | 0 | if (bsize == BLOCK_64X64 && do_recon) { |
4851 | 0 | assert(tp_orig < *tp); |
4852 | 0 | assert(best_rdc.rate < INT_MAX); |
4853 | 0 | assert(best_rdc.dist < INT64_MAX); |
4854 | 0 | } else { |
4855 | 0 | assert(tp_orig == *tp); |
4856 | 0 | } |
4857 | 0 | } |
4858 | | |
4859 | | static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, |
4860 | | TileDataEnc *tile_data, MODE_INFO **mi, |
4861 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
4862 | | BLOCK_SIZE bsize, int output_enabled, |
4863 | 0 | RD_COST *rd_cost, PC_TREE *pc_tree) { |
4864 | 0 | VP9_COMMON *const cm = &cpi->common; |
4865 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
4866 | 0 | MACROBLOCK *const x = &td->mb; |
4867 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
4868 | 0 | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
4869 | 0 | const int mis = cm->mi_stride; |
4870 | 0 | PARTITION_TYPE partition; |
4871 | 0 | BLOCK_SIZE subsize; |
4872 | 0 | RD_COST this_rdc; |
4873 | 0 | BLOCK_SIZE subsize_ref = |
4874 | 0 | (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16; |
4875 | |
|
4876 | 0 | vp9_rd_cost_reset(&this_rdc); |
4877 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
4878 | | |
4879 | 0 | subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; |
4880 | 0 | partition = partition_lookup[bsl][subsize]; |
4881 | |
|
4882 | 0 | if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) { |
4883 | 0 | x->max_partition_size = BLOCK_32X32; |
4884 | 0 | x->min_partition_size = BLOCK_16X16; |
4885 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, |
4886 | 0 | 0, INT64_MAX, pc_tree); |
4887 | 0 | } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && |
4888 | 0 | subsize >= subsize_ref) { |
4889 | 0 | x->max_partition_size = BLOCK_32X32; |
4890 | 0 | x->min_partition_size = BLOCK_8X8; |
4891 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, |
4892 | 0 | 0, INT64_MAX, pc_tree); |
4893 | 0 | } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { |
4894 | 0 | x->max_partition_size = BLOCK_16X16; |
4895 | 0 | x->min_partition_size = BLOCK_8X8; |
4896 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, |
4897 | 0 | 0, INT64_MAX, pc_tree); |
4898 | 0 | } else { |
4899 | 0 | switch (partition) { |
4900 | 0 | case PARTITION_NONE: |
4901 | 0 | pc_tree->none.pred_pixel_ready = 1; |
4902 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, |
4903 | 0 | &pc_tree->none); |
4904 | 0 | pc_tree->none.mic = *xd->mi[0]; |
4905 | 0 | pc_tree->none.mbmi_ext = *x->mbmi_ext; |
4906 | 0 | pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; |
4907 | 0 | pc_tree->none.skip = x->skip; |
4908 | 0 | break; |
4909 | 0 | case PARTITION_VERT: |
4910 | 0 | pc_tree->vertical[0].pred_pixel_ready = 1; |
4911 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, |
4912 | 0 | &pc_tree->vertical[0]); |
4913 | 0 | pc_tree->vertical[0].mic = *xd->mi[0]; |
4914 | 0 | pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; |
4915 | 0 | pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; |
4916 | 0 | pc_tree->vertical[0].skip = x->skip; |
4917 | 0 | if (mi_col + hbs < cm->mi_cols) { |
4918 | 0 | pc_tree->vertical[1].pred_pixel_ready = 1; |
4919 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, |
4920 | 0 | &this_rdc, subsize, &pc_tree->vertical[1]); |
4921 | 0 | pc_tree->vertical[1].mic = *xd->mi[0]; |
4922 | 0 | pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; |
4923 | 0 | pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; |
4924 | 0 | pc_tree->vertical[1].skip = x->skip; |
4925 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
4926 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
4927 | 0 | rd_cost->rate += this_rdc.rate; |
4928 | 0 | rd_cost->dist += this_rdc.dist; |
4929 | 0 | } |
4930 | 0 | } |
4931 | 0 | break; |
4932 | 0 | case PARTITION_HORZ: |
4933 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 1; |
4934 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, |
4935 | 0 | &pc_tree->horizontal[0]); |
4936 | 0 | pc_tree->horizontal[0].mic = *xd->mi[0]; |
4937 | 0 | pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; |
4938 | 0 | pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; |
4939 | 0 | pc_tree->horizontal[0].skip = x->skip; |
4940 | 0 | if (mi_row + hbs < cm->mi_rows) { |
4941 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 1; |
4942 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, |
4943 | 0 | &this_rdc, subsize, &pc_tree->horizontal[1]); |
4944 | 0 | pc_tree->horizontal[1].mic = *xd->mi[0]; |
4945 | 0 | pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; |
4946 | 0 | pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; |
4947 | 0 | pc_tree->horizontal[1].skip = x->skip; |
4948 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
4949 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
4950 | 0 | rd_cost->rate += this_rdc.rate; |
4951 | 0 | rd_cost->dist += this_rdc.dist; |
4952 | 0 | } |
4953 | 0 | } |
4954 | 0 | break; |
4955 | 0 | default: |
4956 | 0 | assert(partition == PARTITION_SPLIT); |
4957 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
4958 | 0 | nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
4959 | 0 | subsize, output_enabled, rd_cost, |
4960 | 0 | pc_tree->u.split[0]); |
4961 | 0 | nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, |
4962 | 0 | mi_col + hbs, subsize, output_enabled, &this_rdc, |
4963 | 0 | pc_tree->u.split[1]); |
4964 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
4965 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
4966 | 0 | rd_cost->rate += this_rdc.rate; |
4967 | 0 | rd_cost->dist += this_rdc.dist; |
4968 | 0 | } |
4969 | 0 | nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp, |
4970 | 0 | mi_row + hbs, mi_col, subsize, output_enabled, |
4971 | 0 | &this_rdc, pc_tree->u.split[2]); |
4972 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
4973 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
4974 | 0 | rd_cost->rate += this_rdc.rate; |
4975 | 0 | rd_cost->dist += this_rdc.dist; |
4976 | 0 | } |
4977 | 0 | nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, |
4978 | 0 | mi_row + hbs, mi_col + hbs, subsize, |
4979 | 0 | output_enabled, &this_rdc, pc_tree->u.split[3]); |
4980 | 0 | if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && |
4981 | 0 | rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { |
4982 | 0 | rd_cost->rate += this_rdc.rate; |
4983 | 0 | rd_cost->dist += this_rdc.dist; |
4984 | 0 | } |
4985 | 0 | break; |
4986 | 0 | } |
4987 | 0 | } |
4988 | | |
4989 | 0 | if (bsize == BLOCK_64X64 && output_enabled) |
4990 | 0 | encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); |
4991 | 0 | } |
4992 | | |
4993 | | static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, |
4994 | | TileDataEnc *tile_data, MODE_INFO **mi, |
4995 | | TOKENEXTRA **tp, int mi_row, int mi_col, |
4996 | | BLOCK_SIZE bsize, int output_enabled, |
4997 | 0 | RD_COST *dummy_cost, PC_TREE *pc_tree) { |
4998 | 0 | VP9_COMMON *const cm = &cpi->common; |
4999 | 0 | TileInfo *tile_info = &tile_data->tile_info; |
5000 | 0 | MACROBLOCK *const x = &td->mb; |
5001 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
5002 | 0 | const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; |
5003 | 0 | const int mis = cm->mi_stride; |
5004 | 0 | PARTITION_TYPE partition; |
5005 | 0 | BLOCK_SIZE subsize; |
5006 | |
|
5007 | 0 | if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; |
5008 | | |
5009 | 0 | subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; |
5010 | 0 | partition = partition_lookup[bsl][subsize]; |
5011 | |
|
5012 | 0 | if (output_enabled && bsize != BLOCK_4X4) { |
5013 | 0 | int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); |
5014 | 0 | td->counts->partition[ctx][partition]++; |
5015 | 0 | } |
5016 | |
|
5017 | 0 | switch (partition) { |
5018 | 0 | case PARTITION_NONE: |
5019 | 0 | pc_tree->none.pred_pixel_ready = 1; |
5020 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5021 | 0 | subsize, &pc_tree->none); |
5022 | 0 | pc_tree->none.mic = *xd->mi[0]; |
5023 | 0 | pc_tree->none.mbmi_ext = *x->mbmi_ext; |
5024 | 0 | pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; |
5025 | 0 | pc_tree->none.skip = x->skip; |
5026 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5027 | 0 | subsize, &pc_tree->none); |
5028 | 0 | break; |
5029 | 0 | case PARTITION_VERT: |
5030 | 0 | pc_tree->vertical[0].pred_pixel_ready = 1; |
5031 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5032 | 0 | subsize, &pc_tree->vertical[0]); |
5033 | 0 | pc_tree->vertical[0].mic = *xd->mi[0]; |
5034 | 0 | pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; |
5035 | 0 | pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; |
5036 | 0 | pc_tree->vertical[0].skip = x->skip; |
5037 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5038 | 0 | subsize, &pc_tree->vertical[0]); |
5039 | 0 | if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { |
5040 | 0 | pc_tree->vertical[1].pred_pixel_ready = 1; |
5041 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, |
5042 | 0 | subsize, &pc_tree->vertical[1]); |
5043 | 0 | pc_tree->vertical[1].mic = *xd->mi[0]; |
5044 | 0 | pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; |
5045 | 0 | pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; |
5046 | 0 | pc_tree->vertical[1].skip = x->skip; |
5047 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, |
5048 | 0 | output_enabled, subsize, &pc_tree->vertical[1]); |
5049 | 0 | } |
5050 | 0 | break; |
5051 | 0 | case PARTITION_HORZ: |
5052 | 0 | pc_tree->horizontal[0].pred_pixel_ready = 1; |
5053 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5054 | 0 | subsize, &pc_tree->horizontal[0]); |
5055 | 0 | pc_tree->horizontal[0].mic = *xd->mi[0]; |
5056 | 0 | pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; |
5057 | 0 | pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; |
5058 | 0 | pc_tree->horizontal[0].skip = x->skip; |
5059 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5060 | 0 | subsize, &pc_tree->horizontal[0]); |
5061 | |
|
5062 | 0 | if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { |
5063 | 0 | pc_tree->horizontal[1].pred_pixel_ready = 1; |
5064 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, |
5065 | 0 | subsize, &pc_tree->horizontal[1]); |
5066 | 0 | pc_tree->horizontal[1].mic = *xd->mi[0]; |
5067 | 0 | pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; |
5068 | 0 | pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; |
5069 | 0 | pc_tree->horizontal[1].skip = x->skip; |
5070 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, |
5071 | 0 | output_enabled, subsize, &pc_tree->horizontal[1]); |
5072 | 0 | } |
5073 | 0 | break; |
5074 | 0 | default: |
5075 | 0 | assert(partition == PARTITION_SPLIT); |
5076 | 0 | subsize = get_subsize(bsize, PARTITION_SPLIT); |
5077 | 0 | if (bsize == BLOCK_8X8) { |
5078 | 0 | nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, |
5079 | 0 | subsize, pc_tree->u.leaf_split[0]); |
5080 | 0 | encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, |
5081 | 0 | subsize, pc_tree->u.leaf_split[0]); |
5082 | 0 | } else { |
5083 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, |
5084 | 0 | output_enabled, dummy_cost, pc_tree->u.split[0]); |
5085 | 0 | nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, |
5086 | 0 | mi_col + hbs, subsize, output_enabled, dummy_cost, |
5087 | 0 | pc_tree->u.split[1]); |
5088 | 0 | nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp, |
5089 | 0 | mi_row + hbs, mi_col, subsize, output_enabled, |
5090 | 0 | dummy_cost, pc_tree->u.split[2]); |
5091 | 0 | nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, |
5092 | 0 | mi_row + hbs, mi_col + hbs, subsize, output_enabled, |
5093 | 0 | dummy_cost, pc_tree->u.split[3]); |
5094 | 0 | } |
5095 | 0 | break; |
5096 | 0 | } |
5097 | | |
5098 | 0 | if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) |
5099 | 0 | update_partition_context(xd, mi_row, mi_col, subsize, bsize); |
5100 | 0 | } |
5101 | | |
5102 | | // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock. |
5103 | | static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, |
5104 | 0 | MACROBLOCK *x, int mi_row, int mi_col) { |
5105 | 0 | VP9_COMMON *const cm = &cpi->common; |
5106 | 0 | const int is_key_frame = frame_is_intra_only(cm); |
5107 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
5108 | |
|
5109 | 0 | set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); |
5110 | |
|
5111 | 0 | if (!is_key_frame) { |
5112 | 0 | MODE_INFO *mi = xd->mi[0]; |
5113 | 0 | YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); |
5114 | 0 | const YV12_BUFFER_CONFIG *yv12_g = NULL; |
5115 | 0 | const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + |
5116 | 0 | (mi_row + 4 < cm->mi_rows); |
5117 | 0 | unsigned int y_sad_g, y_sad_thr; |
5118 | 0 | unsigned int y_sad = UINT_MAX; |
5119 | |
|
5120 | 0 | assert(yv12 != NULL); |
5121 | |
|
5122 | 0 | if (!(is_one_pass_svc(cpi) && cpi->svc.spatial_layer_id) || |
5123 | 0 | cpi->svc.use_gf_temporal_ref_current_layer) { |
5124 | | // For now, GOLDEN will not be used for non-zero spatial layers, since |
5125 | | // it may not be a temporal reference. |
5126 | 0 | yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); |
5127 | 0 | } |
5128 | | |
5129 | | // Only compute y_sad_g (sad for golden reference) for speed < 8. |
5130 | 0 | if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && |
5131 | 0 | (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { |
5132 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
5133 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
5134 | 0 | y_sad_g = cpi->fn_ptr[bsize].sdf( |
5135 | 0 | x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, |
5136 | 0 | xd->plane[0].pre[0].stride); |
5137 | 0 | } else { |
5138 | 0 | y_sad_g = UINT_MAX; |
5139 | 0 | } |
5140 | |
|
5141 | 0 | if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && |
5142 | 0 | cpi->rc.is_src_frame_alt_ref) { |
5143 | 0 | yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); |
5144 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
5145 | 0 | &cm->frame_refs[ALTREF_FRAME - 1].sf); |
5146 | 0 | mi->ref_frame[0] = ALTREF_FRAME; |
5147 | 0 | y_sad_g = UINT_MAX; |
5148 | 0 | } else { |
5149 | 0 | vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, |
5150 | 0 | &cm->frame_refs[LAST_FRAME - 1].sf); |
5151 | 0 | mi->ref_frame[0] = LAST_FRAME; |
5152 | 0 | } |
5153 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
5154 | 0 | mi->sb_type = BLOCK_64X64; |
5155 | 0 | mi->mv[0].as_int = 0; |
5156 | 0 | mi->interp_filter = BILINEAR; |
5157 | |
|
5158 | 0 | { |
5159 | 0 | const MV dummy_mv = { 0, 0 }; |
5160 | 0 | y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, |
5161 | 0 | &dummy_mv); |
5162 | 0 | x->sb_use_mv_part = 1; |
5163 | 0 | x->sb_mvcol_part = mi->mv[0].as_mv.col; |
5164 | 0 | x->sb_mvrow_part = mi->mv[0].as_mv.row; |
5165 | 0 | } |
5166 | | |
5167 | | // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad |
5168 | | // are close if short_circuit_low_temp_var is on. |
5169 | 0 | y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; |
5170 | 0 | if (y_sad_g < y_sad_thr) { |
5171 | 0 | vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, |
5172 | 0 | &cm->frame_refs[GOLDEN_FRAME - 1].sf); |
5173 | 0 | mi->ref_frame[0] = GOLDEN_FRAME; |
5174 | 0 | mi->mv[0].as_int = 0; |
5175 | 0 | } else { |
5176 | 0 | x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; |
5177 | 0 | } |
5178 | |
|
5179 | 0 | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
5180 | 0 | xd->plane[0].dst.buf = x->est_pred; |
5181 | 0 | xd->plane[0].dst.stride = 64; |
5182 | 0 | vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); |
5183 | 0 | } else { |
5184 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
5185 | 0 | switch (xd->bd) { |
5186 | 0 | case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break; |
5187 | 0 | case 10: |
5188 | 0 | memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0])); |
5189 | 0 | break; |
5190 | 0 | case 12: |
5191 | 0 | memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0])); |
5192 | 0 | break; |
5193 | 0 | } |
5194 | | #else |
5195 | | memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); |
5196 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
5197 | 0 | } |
5198 | 0 | } |
5199 | | |
5200 | | static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, |
5201 | | TileDataEnc *tile_data, int mi_row, |
5202 | 0 | TOKENEXTRA **tp) { |
5203 | 0 | SPEED_FEATURES *const sf = &cpi->sf; |
5204 | 0 | VP9_COMMON *const cm = &cpi->common; |
5205 | 0 | TileInfo *const tile_info = &tile_data->tile_info; |
5206 | 0 | MACROBLOCK *const x = &td->mb; |
5207 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
5208 | 0 | const int mi_col_start = tile_info->mi_col_start; |
5209 | 0 | const int mi_col_end = tile_info->mi_col_end; |
5210 | 0 | int mi_col; |
5211 | 0 | const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; |
5212 | 0 | const int num_sb_cols = |
5213 | 0 | get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); |
5214 | 0 | int sb_col_in_tile; |
5215 | | |
5216 | | // Initialize the left context for the new SB row |
5217 | 0 | memset(&xd->left_context, 0, sizeof(xd->left_context)); |
5218 | 0 | memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); |
5219 | | |
5220 | | // Code each SB in the row |
5221 | 0 | for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; |
5222 | 0 | mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { |
5223 | 0 | const struct segmentation *const seg = &cm->seg; |
5224 | 0 | RD_COST dummy_rdc; |
5225 | 0 | const int idx_str = cm->mi_stride * mi_row + mi_col; |
5226 | 0 | MODE_INFO **mi = cm->mi_grid_visible + idx_str; |
5227 | 0 | PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; |
5228 | 0 | BLOCK_SIZE bsize = BLOCK_64X64; |
5229 | 0 | int seg_skip = 0; |
5230 | 0 | int i; |
5231 | |
|
5232 | 0 | (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, |
5233 | 0 | sb_col_in_tile); |
5234 | |
|
5235 | 0 | if (cpi->use_skin_detection) { |
5236 | 0 | vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col); |
5237 | 0 | } |
5238 | |
|
5239 | 0 | x->source_variance = UINT_MAX; |
5240 | 0 | for (i = 0; i < MAX_REF_FRAMES; ++i) { |
5241 | 0 | x->pred_mv[i].row = INT16_MAX; |
5242 | 0 | x->pred_mv[i].col = INT16_MAX; |
5243 | 0 | } |
5244 | 0 | vp9_rd_cost_init(&dummy_rdc); |
5245 | 0 | x->color_sensitivity[0] = 0; |
5246 | 0 | x->color_sensitivity[1] = 0; |
5247 | 0 | x->sb_is_skin = 0; |
5248 | 0 | x->skip_low_source_sad = 0; |
5249 | 0 | x->lowvar_highsumdiff = 0; |
5250 | 0 | x->content_state_sb = 0; |
5251 | 0 | x->zero_temp_sad_source = 0; |
5252 | 0 | x->sb_use_mv_part = 0; |
5253 | 0 | x->sb_mvcol_part = 0; |
5254 | 0 | x->sb_mvrow_part = 0; |
5255 | 0 | x->sb_pickmode_part = 0; |
5256 | 0 | x->arf_frame_usage = 0; |
5257 | 0 | x->lastgolden_frame_usage = 0; |
5258 | |
|
5259 | 0 | if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { |
5260 | 0 | int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); |
5261 | 0 | int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
5262 | 0 | int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2); |
5263 | 0 | if (sf->adapt_partition_source_sad && |
5264 | 0 | (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref && |
5265 | 0 | source_sad > sf->adapt_partition_thresh && |
5266 | 0 | (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) |
5267 | 0 | partition_search_type = REFERENCE_PARTITION; |
5268 | 0 | } |
5269 | |
|
5270 | 0 | if (seg->enabled) { |
5271 | 0 | const uint8_t *const map = |
5272 | 0 | seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; |
5273 | 0 | int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); |
5274 | 0 | seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); |
5275 | |
|
5276 | 0 | if (cpi->roi.enabled && cpi->roi.skip[BACKGROUND_SEG_SKIP_ID] && |
5277 | 0 | cpi->rc.frames_since_key > FRAMES_NO_SKIPPING_AFTER_KEY && |
5278 | 0 | x->content_state_sb > kLowSadLowSumdiff) { |
5279 | | // For ROI with skip, force segment = 0 (no skip) over whole |
5280 | | // superblock to avoid artifacts if temporal change in source_sad is |
5281 | | // not 0. |
5282 | 0 | int xi, yi; |
5283 | 0 | const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; |
5284 | 0 | const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; |
5285 | 0 | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
5286 | 0 | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
5287 | 0 | const int block_index = mi_row * cm->mi_cols + mi_col; |
5288 | 0 | set_mode_info_offsets(cm, x, xd, mi_row, mi_col); |
5289 | 0 | for (yi = 0; yi < ymis; yi++) |
5290 | 0 | for (xi = 0; xi < xmis; xi++) { |
5291 | 0 | int map_offset = block_index + yi * cm->mi_cols + xi; |
5292 | 0 | cpi->segmentation_map[map_offset] = 0; |
5293 | 0 | } |
5294 | 0 | set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); |
5295 | 0 | seg_skip = 0; |
5296 | 0 | } |
5297 | 0 | if (seg_skip) { |
5298 | 0 | partition_search_type = FIXED_PARTITION; |
5299 | 0 | } |
5300 | 0 | } |
5301 | | |
5302 | | // Set the partition type of the 64X64 block |
5303 | 0 | switch (partition_search_type) { |
5304 | 0 | case VAR_BASED_PARTITION: |
5305 | | // TODO(jingning, marpan): The mode decision and encoding process |
5306 | | // support both intra and inter sub8x8 block coding for RTC mode. |
5307 | | // Tune the thresholds accordingly to use sub8x8 block coding for |
5308 | | // coding performance improvement. |
5309 | 0 | choose_partitioning(cpi, tile_info, x, mi_row, mi_col); |
5310 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5311 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5312 | 0 | break; |
5313 | 0 | case ML_BASED_PARTITION: |
5314 | 0 | get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); |
5315 | 0 | x->max_partition_size = BLOCK_64X64; |
5316 | 0 | x->min_partition_size = BLOCK_8X8; |
5317 | 0 | x->sb_pickmode_part = 1; |
5318 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, |
5319 | 0 | BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, |
5320 | 0 | td->pc_root); |
5321 | 0 | break; |
5322 | 0 | case FIXED_PARTITION: |
5323 | 0 | if (!seg_skip) bsize = sf->always_this_block_size; |
5324 | 0 | set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); |
5325 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5326 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5327 | 0 | break; |
5328 | 0 | default: |
5329 | 0 | assert(partition_search_type == REFERENCE_PARTITION); |
5330 | 0 | x->sb_pickmode_part = 1; |
5331 | 0 | set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); |
5332 | | // Use nonrd_pick_partition on scene-cut for VBR mode. |
5333 | | // nonrd_pick_partition does not support 4x4 partition, so avoid it |
5334 | | // on key frame for now. |
5335 | 0 | if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && |
5336 | 0 | cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) && |
5337 | 0 | (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { |
5338 | | // Use lower max_partition_size for low resolutions. |
5339 | 0 | if (cm->width <= 352 && cm->height <= 288) |
5340 | 0 | x->max_partition_size = BLOCK_32X32; |
5341 | 0 | else |
5342 | 0 | x->max_partition_size = BLOCK_64X64; |
5343 | 0 | x->min_partition_size = BLOCK_8X8; |
5344 | 0 | nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, |
5345 | 0 | BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, |
5346 | 0 | td->pc_root); |
5347 | 0 | } else { |
5348 | 0 | choose_partitioning(cpi, tile_info, x, mi_row, mi_col); |
5349 | | // TODO(marpan): Seems like nonrd_select_partition does not support |
5350 | | // 4x4 partition. Since 4x4 is used on key frame, use this switch |
5351 | | // for now. |
5352 | 0 | if (frame_is_intra_only(cm)) |
5353 | 0 | nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5354 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5355 | 0 | else |
5356 | 0 | nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, |
5357 | 0 | BLOCK_64X64, 1, &dummy_rdc, td->pc_root); |
5358 | 0 | } |
5359 | |
|
5360 | 0 | break; |
5361 | 0 | } |
5362 | | |
5363 | | // Update ref_frame usage for inter frame if this group is ARF group. |
5364 | 0 | if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame && |
5365 | 0 | !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group && |
5366 | 0 | cpi->sf.use_altref_onepass) { |
5367 | 0 | int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); |
5368 | 0 | if (cpi->count_arf_frame_usage != NULL) |
5369 | 0 | cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage; |
5370 | 0 | if (cpi->count_lastgolden_frame_usage != NULL) |
5371 | 0 | cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage; |
5372 | 0 | } |
5373 | |
|
5374 | 0 | (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, |
5375 | 0 | sb_col_in_tile, num_sb_cols); |
5376 | 0 | } |
5377 | 0 | } |
5378 | | // end RTC play code |
5379 | | |
5380 | 0 | static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { |
5381 | 0 | unsigned int intra_count = 0, inter_count = 0; |
5382 | 0 | int j; |
5383 | |
|
5384 | 0 | for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { |
5385 | 0 | intra_count += td->counts->intra_inter[j][0]; |
5386 | 0 | inter_count += td->counts->intra_inter[j][1]; |
5387 | 0 | } |
5388 | |
|
5389 | 0 | return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME && |
5390 | 0 | cm->show_frame; |
5391 | 0 | } |
5392 | | |
5393 | 54.1k | void vp9_init_tile_data(VP9_COMP *cpi) { |
5394 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
5395 | 54.1k | const int tile_cols = 1 << cm->log2_tile_cols; |
5396 | 54.1k | const int tile_rows = 1 << cm->log2_tile_rows; |
5397 | 54.1k | int tile_col, tile_row; |
5398 | 54.1k | TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; |
5399 | 54.1k | TOKENLIST *tplist = cpi->tplist[0][0]; |
5400 | 54.1k | int tile_tok = 0; |
5401 | 54.1k | int tplist_count = 0; |
5402 | | |
5403 | 54.1k | if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { |
5404 | 3.88k | if (cpi->tile_data != NULL) { |
5405 | | // Free the row mt memory in cpi->tile_data first. |
5406 | 0 | vp9_row_mt_mem_dealloc(cpi); |
5407 | 0 | vpx_free(cpi->tile_data); |
5408 | 0 | } |
5409 | 3.88k | cpi->allocated_tiles = 0; |
5410 | 3.88k | CHECK_MEM_ERROR( |
5411 | 3.88k | &cm->error, cpi->tile_data, |
5412 | 3.88k | vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data))); |
5413 | 3.88k | cpi->allocated_tiles = tile_cols * tile_rows; |
5414 | | |
5415 | 7.76k | for (tile_row = 0; tile_row < tile_rows; ++tile_row) |
5416 | 11.3k | for (tile_col = 0; tile_col < tile_cols; ++tile_col) { |
5417 | 7.51k | TileDataEnc *tile_data = |
5418 | 7.51k | &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5419 | 7.51k | int i, j; |
5420 | 7.51k | const MV zero_mv = { 0, 0 }; |
5421 | 105k | for (i = 0; i < BLOCK_SIZES; ++i) { |
5422 | 3.02M | for (j = 0; j < MAX_MODES; ++j) { |
5423 | 2.93M | tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; |
5424 | 2.93M | tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; |
5425 | 2.93M | tile_data->mode_map[i][j] = j; |
5426 | 2.93M | } |
5427 | 97.7k | } |
5428 | 7.51k | tile_data->firstpass_top_mv = zero_mv; |
5429 | 7.51k | #if CONFIG_MULTITHREAD |
5430 | 7.51k | tile_data->row_base_thresh_freq_fact = NULL; |
5431 | 7.51k | #endif |
5432 | 7.51k | } |
5433 | 3.88k | } |
5434 | | |
5435 | 108k | for (tile_row = 0; tile_row < tile_rows; ++tile_row) { |
5436 | 113k | for (tile_col = 0; tile_col < tile_cols; ++tile_col) { |
5437 | 58.9k | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5438 | 58.9k | TileInfo *tile_info = &this_tile->tile_info; |
5439 | 58.9k | if (cpi->sf.adaptive_rd_thresh_row_mt) { |
5440 | 0 | vp9_row_mt_alloc_rd_thresh(cpi, this_tile); |
5441 | 0 | } |
5442 | 58.9k | vp9_tile_init(tile_info, cm, tile_row, tile_col); |
5443 | | |
5444 | 58.9k | cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; |
5445 | 58.9k | pre_tok = cpi->tile_tok[tile_row][tile_col]; |
5446 | 58.9k | tile_tok = allocated_tokens(*tile_info); |
5447 | | |
5448 | 58.9k | cpi->tplist[tile_row][tile_col] = tplist + tplist_count; |
5449 | 58.9k | tplist = cpi->tplist[tile_row][tile_col]; |
5450 | 58.9k | tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); |
5451 | 58.9k | } |
5452 | 54.1k | } |
5453 | 54.1k | } |
5454 | | |
5455 | | void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, |
5456 | 76.1k | int tile_col, int mi_row) { |
5457 | 76.1k | VP9_COMMON *const cm = &cpi->common; |
5458 | 76.1k | const int tile_cols = 1 << cm->log2_tile_cols; |
5459 | 76.1k | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5460 | 76.1k | const TileInfo *const tile_info = &this_tile->tile_info; |
5461 | 76.1k | TOKENEXTRA *tok = NULL; |
5462 | 76.1k | int tile_sb_row; |
5463 | 76.1k | int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; |
5464 | | |
5465 | 76.1k | tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >> |
5466 | 76.1k | MI_BLOCK_SIZE_LOG2; |
5467 | 76.1k | get_start_tok(cpi, tile_row, tile_col, mi_row, &tok); |
5468 | 76.1k | cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok; |
5469 | | |
5470 | | #if CONFIG_REALTIME_ONLY |
5471 | | assert(cpi->sf.use_nonrd_pick_mode); |
5472 | | encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); |
5473 | | #else |
5474 | 76.1k | if (cpi->sf.use_nonrd_pick_mode) |
5475 | 0 | encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); |
5476 | 76.1k | else |
5477 | 76.1k | encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); |
5478 | 76.1k | #endif |
5479 | | |
5480 | 76.1k | cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; |
5481 | 76.1k | cpi->tplist[tile_row][tile_col][tile_sb_row].count = |
5482 | 76.1k | (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop - |
5483 | 76.1k | cpi->tplist[tile_row][tile_col][tile_sb_row].start); |
5484 | 76.1k | assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <= |
5485 | 76.1k | get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols)); |
5486 | | |
5487 | 76.1k | (void)tile_mb_cols; |
5488 | 76.1k | } |
5489 | | |
5490 | | void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, |
5491 | 58.9k | int tile_col) { |
5492 | 58.9k | VP9_COMMON *const cm = &cpi->common; |
5493 | 58.9k | const int tile_cols = 1 << cm->log2_tile_cols; |
5494 | 58.9k | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
5495 | 58.9k | const TileInfo *const tile_info = &this_tile->tile_info; |
5496 | 58.9k | const int mi_row_start = tile_info->mi_row_start; |
5497 | 58.9k | const int mi_row_end = tile_info->mi_row_end; |
5498 | 58.9k | int mi_row; |
5499 | | |
5500 | 135k | for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) |
5501 | 76.1k | vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); |
5502 | 58.9k | } |
5503 | | |
5504 | 54.1k | static void encode_tiles(VP9_COMP *cpi) { |
5505 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
5506 | 54.1k | const int tile_cols = 1 << cm->log2_tile_cols; |
5507 | 54.1k | const int tile_rows = 1 << cm->log2_tile_rows; |
5508 | 54.1k | int tile_col, tile_row; |
5509 | | |
5510 | 54.1k | vp9_init_tile_data(cpi); |
5511 | | |
5512 | 108k | for (tile_row = 0; tile_row < tile_rows; ++tile_row) |
5513 | 113k | for (tile_col = 0; tile_col < tile_cols; ++tile_col) |
5514 | 58.9k | vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); |
5515 | 54.1k | } |
5516 | | |
5517 | 0 | static int compare_kmeans_data(const void *a, const void *b) { |
5518 | 0 | if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) { |
5519 | 0 | return 1; |
5520 | 0 | } else if (((const KMEANS_DATA *)a)->value < |
5521 | 0 | ((const KMEANS_DATA *)b)->value) { |
5522 | 0 | return -1; |
5523 | 0 | } else { |
5524 | 0 | return 0; |
5525 | 0 | } |
5526 | 0 | } |
5527 | | |
5528 | | static void compute_boundary_ls(const double *ctr_ls, int k, |
5529 | 0 | double *boundary_ls) { |
5530 | | // boundary_ls[j] is the upper bound of data centered at ctr_ls[j] |
5531 | 0 | int j; |
5532 | 0 | for (j = 0; j < k - 1; ++j) { |
5533 | 0 | boundary_ls[j] = (ctr_ls[j] + ctr_ls[j + 1]) / 2.; |
5534 | 0 | } |
5535 | 0 | boundary_ls[k - 1] = DBL_MAX; |
5536 | 0 | } |
5537 | | |
5538 | 0 | int vp9_get_group_idx(double value, double *boundary_ls, int k) { |
5539 | 0 | int group_idx = 0; |
5540 | 0 | while (value >= boundary_ls[group_idx]) { |
5541 | 0 | ++group_idx; |
5542 | 0 | if (group_idx == k - 1) { |
5543 | 0 | break; |
5544 | 0 | } |
5545 | 0 | } |
5546 | 0 | return group_idx; |
5547 | 0 | } |
5548 | | |
5549 | | void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k, |
5550 | 0 | KMEANS_DATA *arr, int size) { |
5551 | 0 | int i, j; |
5552 | 0 | int itr; |
5553 | 0 | int group_idx; |
5554 | 0 | double sum[MAX_KMEANS_GROUPS]; |
5555 | 0 | int count[MAX_KMEANS_GROUPS]; |
5556 | |
|
5557 | 0 | vpx_clear_system_state(); |
5558 | |
|
5559 | 0 | assert(k >= 2 && k <= MAX_KMEANS_GROUPS); |
5560 | |
|
5561 | 0 | qsort(arr, size, sizeof(*arr), compare_kmeans_data); |
5562 | | |
5563 | | // initialize the center points |
5564 | 0 | for (j = 0; j < k; ++j) { |
5565 | 0 | ctr_ls[j] = arr[(size * (2 * j + 1)) / (2 * k)].value; |
5566 | 0 | } |
5567 | |
|
5568 | 0 | for (itr = 0; itr < 10; ++itr) { |
5569 | 0 | compute_boundary_ls(ctr_ls, k, boundary_ls); |
5570 | 0 | for (i = 0; i < MAX_KMEANS_GROUPS; ++i) { |
5571 | 0 | sum[i] = 0; |
5572 | 0 | count[i] = 0; |
5573 | 0 | } |
5574 | | |
5575 | | // Both the data and centers are sorted in ascending order. |
5576 | | // As each data point is processed in order, its corresponding group index |
5577 | | // can only increase. So we only need to reset the group index to zero here. |
5578 | 0 | group_idx = 0; |
5579 | 0 | for (i = 0; i < size; ++i) { |
5580 | 0 | while (arr[i].value >= boundary_ls[group_idx]) { |
5581 | | // place samples into clusters |
5582 | 0 | ++group_idx; |
5583 | 0 | if (group_idx == k - 1) { |
5584 | 0 | break; |
5585 | 0 | } |
5586 | 0 | } |
5587 | 0 | sum[group_idx] += arr[i].value; |
5588 | 0 | ++count[group_idx]; |
5589 | 0 | } |
5590 | |
|
5591 | 0 | for (group_idx = 0; group_idx < k; ++group_idx) { |
5592 | 0 | if (count[group_idx] > 0) |
5593 | 0 | ctr_ls[group_idx] = sum[group_idx] / count[group_idx]; |
5594 | |
|
5595 | 0 | sum[group_idx] = 0; |
5596 | 0 | count[group_idx] = 0; |
5597 | 0 | } |
5598 | 0 | } |
5599 | | |
5600 | | // compute group_idx, boundary_ls and count_ls |
5601 | 0 | for (j = 0; j < k; ++j) { |
5602 | 0 | count_ls[j] = 0; |
5603 | 0 | } |
5604 | 0 | compute_boundary_ls(ctr_ls, k, boundary_ls); |
5605 | 0 | group_idx = 0; |
5606 | 0 | for (i = 0; i < size; ++i) { |
5607 | 0 | while (arr[i].value >= boundary_ls[group_idx]) { |
5608 | 0 | ++group_idx; |
5609 | 0 | if (group_idx == k - 1) { |
5610 | 0 | break; |
5611 | 0 | } |
5612 | 0 | } |
5613 | 0 | arr[i].group_idx = group_idx; |
5614 | 0 | ++count_ls[group_idx]; |
5615 | 0 | } |
5616 | 0 | } |
5617 | | |
5618 | 54.1k | static void encode_frame_internal(VP9_COMP *cpi) { |
5619 | 54.1k | SPEED_FEATURES *const sf = &cpi->sf; |
5620 | 54.1k | ThreadData *const td = &cpi->td; |
5621 | 54.1k | MACROBLOCK *const x = &td->mb; |
5622 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
5623 | 54.1k | MACROBLOCKD *const xd = &x->e_mbd; |
5624 | 54.1k | const int gf_group_index = cpi->twopass.gf_group.index; |
5625 | | |
5626 | 54.1k | xd->mi = cm->mi_grid_visible; |
5627 | 54.1k | xd->mi[0] = cm->mi; |
5628 | 54.1k | vp9_zero(*td->counts); |
5629 | 54.1k | vp9_zero(cpi->td.rd_counts); |
5630 | | |
5631 | 54.1k | xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && |
5632 | 7.44k | cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; |
5633 | | |
5634 | 54.1k | #if CONFIG_VP9_HIGHBITDEPTH |
5635 | 54.1k | if (cm->use_highbitdepth) |
5636 | 0 | x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4; |
5637 | 54.1k | else |
5638 | 54.1k | x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; |
5639 | 54.1k | x->highbd_inv_txfm_add = |
5640 | 54.1k | xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; |
5641 | | #else |
5642 | | x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; |
5643 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
5644 | 54.1k | x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; |
5645 | 54.1k | x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; |
5646 | 54.1k | if (xd->lossless) x->optimize = 0; |
5647 | 54.1k | x->sharpness = cpi->oxcf.sharpness; |
5648 | 54.1k | x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ); |
5649 | | |
5650 | 54.1k | cm->tx_mode = select_tx_mode(cpi, xd); |
5651 | | |
5652 | 54.1k | vp9_frame_init_quantizer(cpi); |
5653 | | |
5654 | 54.1k | vp9_initialize_rd_consts(cpi); |
5655 | 54.1k | vp9_initialize_me_consts(cpi, x, cm->base_qindex); |
5656 | 54.1k | init_encode_frame_mb_context(cpi); |
5657 | 54.1k | cm->use_prev_frame_mvs = |
5658 | 54.1k | !cm->error_resilient_mode && cm->width == cm->last_width && |
5659 | 50.3k | cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame; |
5660 | | // Special case: set prev_mi to NULL when the previous mode info |
5661 | | // context cannot be used. |
5662 | 54.1k | cm->prev_mi = |
5663 | 54.1k | cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL; |
5664 | | |
5665 | 54.1k | x->quant_fp = cpi->sf.use_quant_fp; |
5666 | 54.1k | vp9_zero(x->skip_txfm); |
5667 | 54.1k | if (sf->use_nonrd_pick_mode) { |
5668 | | // Initialize internal buffer pointers for rtc coding, where non-RD |
5669 | | // mode decision is used and hence no buffer pointer swap needed. |
5670 | 0 | int i; |
5671 | 0 | struct macroblock_plane *const p = x->plane; |
5672 | 0 | struct macroblockd_plane *const pd = xd->plane; |
5673 | 0 | PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; |
5674 | |
|
5675 | 0 | for (i = 0; i < MAX_MB_PLANE; ++i) { |
5676 | 0 | p[i].coeff = ctx->coeff_pbuf[i][0]; |
5677 | 0 | p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; |
5678 | 0 | pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; |
5679 | 0 | p[i].eobs = ctx->eobs_pbuf[i][0]; |
5680 | 0 | } |
5681 | 0 | vp9_zero(x->zcoeff_blk); |
5682 | |
|
5683 | 0 | if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 && |
5684 | 0 | !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) && |
5685 | 0 | !cpi->use_svc) |
5686 | 0 | cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); |
5687 | 54.1k | } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE && |
5688 | 0 | cpi->sf.enable_tpl_model) { |
5689 | 0 | TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; |
5690 | 0 | TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; |
5691 | |
|
5692 | 0 | int tpl_stride = tpl_frame->stride; |
5693 | 0 | int64_t intra_cost_base = 0; |
5694 | 0 | int64_t mc_dep_cost_base = 0; |
5695 | 0 | int row, col; |
5696 | |
|
5697 | 0 | for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) { |
5698 | 0 | for (col = 0; col < cm->mi_cols; ++col) { |
5699 | 0 | TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; |
5700 | 0 | intra_cost_base += this_stats->intra_cost; |
5701 | 0 | mc_dep_cost_base += this_stats->mc_dep_cost; |
5702 | 0 | } |
5703 | 0 | } |
5704 | |
|
5705 | 0 | vpx_clear_system_state(); |
5706 | |
|
5707 | 0 | if (tpl_frame->is_valid) |
5708 | 0 | cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; |
5709 | 0 | } |
5710 | | |
5711 | 216k | for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; |
5712 | 162k | ++ref_frame) { |
5713 | 162k | if (cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) { |
5714 | 111k | if (cm->frame_refs[ref_frame - 1].sf.x_scale_fp == REF_INVALID_SCALE || |
5715 | 111k | cm->frame_refs[ref_frame - 1].sf.y_scale_fp == REF_INVALID_SCALE) |
5716 | 0 | cpi->ref_frame_flags &= ~ref_frame_to_flag(ref_frame); |
5717 | 111k | } |
5718 | 162k | } |
5719 | | |
5720 | | // Frame segmentation |
5721 | 54.1k | if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ) build_kmeans_segmentation(cpi); |
5722 | | |
5723 | 54.1k | { |
5724 | | #if CONFIG_INTERNAL_STATS |
5725 | | struct vpx_usec_timer emr_timer; |
5726 | | vpx_usec_timer_start(&emr_timer); |
5727 | | #endif |
5728 | | |
5729 | 54.1k | if (!cpi->row_mt) { |
5730 | 54.1k | cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; |
5731 | 54.1k | cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; |
5732 | | // If allowed, encoding tiles in parallel with one thread handling one |
5733 | | // tile when row based multi-threading is disabled. |
5734 | 54.1k | if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) |
5735 | 0 | vp9_encode_tiles_mt(cpi); |
5736 | 54.1k | else |
5737 | 54.1k | encode_tiles(cpi); |
5738 | 54.1k | } else { |
5739 | 0 | cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; |
5740 | 0 | cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; |
5741 | 0 | vp9_encode_tiles_row_mt(cpi); |
5742 | 0 | } |
5743 | | |
5744 | | #if CONFIG_INTERNAL_STATS |
5745 | | vpx_usec_timer_mark(&emr_timer); |
5746 | | cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); |
5747 | | #endif |
5748 | 54.1k | } |
5749 | | |
5750 | 54.1k | sf->skip_encode_frame = |
5751 | 54.1k | sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0; |
5752 | | |
5753 | | #if 0 |
5754 | | // Keep record of the total distortion this time around for future use |
5755 | | cpi->last_frame_distortion = cpi->frame_distortion; |
5756 | | #endif |
5757 | 54.1k | } |
5758 | | |
5759 | | static INTERP_FILTER get_interp_filter( |
5760 | 54.1k | const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) { |
5761 | 54.1k | if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] && |
5762 | 13.8k | threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] && |
5763 | 13.7k | threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) { |
5764 | 10.1k | return EIGHTTAP_SMOOTH; |
5765 | 44.0k | } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] && |
5766 | 1.50k | threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) { |
5767 | 1.06k | return EIGHTTAP_SHARP; |
5768 | 42.9k | } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) { |
5769 | 12.5k | return EIGHTTAP; |
5770 | 30.4k | } else { |
5771 | 30.4k | return SWITCHABLE; |
5772 | 30.4k | } |
5773 | 54.1k | } |
5774 | | |
5775 | 0 | static int compute_frame_aq_offset(struct VP9_COMP *cpi) { |
5776 | 0 | VP9_COMMON *const cm = &cpi->common; |
5777 | 0 | MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; |
5778 | 0 | struct segmentation *const seg = &cm->seg; |
5779 | |
|
5780 | 0 | int mi_row, mi_col; |
5781 | 0 | int sum_delta = 0; |
5782 | 0 | int qdelta_index; |
5783 | 0 | int segment_id; |
5784 | |
|
5785 | 0 | for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { |
5786 | 0 | MODE_INFO **mi_8x8 = mi_8x8_ptr; |
5787 | 0 | for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { |
5788 | 0 | segment_id = mi_8x8[0]->segment_id; |
5789 | 0 | qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); |
5790 | 0 | sum_delta += qdelta_index; |
5791 | 0 | } |
5792 | 0 | mi_8x8_ptr += cm->mi_stride; |
5793 | 0 | } |
5794 | |
|
5795 | 0 | return sum_delta / (cm->mi_rows * cm->mi_cols); |
5796 | 0 | } |
5797 | | |
5798 | 54.1k | static void restore_encode_params(VP9_COMP *cpi) { |
5799 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
5800 | 54.1k | int tile_idx; |
5801 | 54.1k | int i, j; |
5802 | 54.1k | TileDataEnc *tile_data; |
5803 | 54.1k | RD_OPT *rd_opt = &cpi->rd; |
5804 | 270k | for (i = 0; i < MAX_REF_FRAMES; i++) { |
5805 | 866k | for (j = 0; j < REFERENCE_MODES; j++) |
5806 | 650k | rd_opt->prediction_type_threshes[i][j] = |
5807 | 650k | rd_opt->prediction_type_threshes_prev[i][j]; |
5808 | | |
5809 | 1.08M | for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) |
5810 | 866k | rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; |
5811 | 216k | } |
5812 | | |
5813 | 105k | for (tile_idx = 0; tile_idx < cpi->allocated_tiles; tile_idx++) { |
5814 | 51.4k | assert(cpi->tile_data); |
5815 | 51.4k | tile_data = &cpi->tile_data[tile_idx]; |
5816 | 51.4k | vp9_copy(tile_data->thresh_freq_fact, tile_data->thresh_freq_fact_prev); |
5817 | 51.4k | } |
5818 | | |
5819 | 54.1k | cm->interp_filter = cpi->sf.default_interp_filter; |
5820 | 54.1k | } |
5821 | | |
5822 | 54.1k | void vp9_encode_frame(VP9_COMP *cpi) { |
5823 | 54.1k | VP9_COMMON *const cm = &cpi->common; |
5824 | | |
5825 | 54.1k | restore_encode_params(cpi); |
5826 | | |
5827 | | #if CONFIG_MISMATCH_DEBUG |
5828 | | mismatch_reset_frame(MAX_MB_PLANE); |
5829 | | #endif |
5830 | | |
5831 | | // In the longer term the encoder should be generalized to match the |
5832 | | // decoder such that we allow compound where one of the 3 buffers has a |
5833 | | // different sign bias and that buffer is then the fixed ref. However, this |
5834 | | // requires further work in the rd loop. For now the only supported encoder |
5835 | | // side behavior is where the ALT ref buffer has opposite sign bias to |
5836 | | // the other two. |
5837 | 54.1k | if (!frame_is_intra_only(cm)) { |
5838 | 41.3k | if (vp9_compound_reference_allowed(cm)) { |
5839 | 0 | cpi->allow_comp_inter_inter = 1; |
5840 | 0 | vp9_setup_compound_reference_mode(cm); |
5841 | 41.3k | } else { |
5842 | 41.3k | cpi->allow_comp_inter_inter = 0; |
5843 | 41.3k | } |
5844 | 41.3k | } |
5845 | | |
5846 | 54.1k | if (cpi->sf.frame_parameter_update) { |
5847 | 54.1k | int i; |
5848 | 54.1k | RD_OPT *const rd_opt = &cpi->rd; |
5849 | 54.1k | FRAME_COUNTS *counts = cpi->td.counts; |
5850 | 54.1k | RD_COUNTS *const rdc = &cpi->td.rd_counts; |
5851 | | |
5852 | | // This code does a single RD pass over the whole frame assuming |
5853 | | // either compound, single or hybrid prediction as per whatever has |
5854 | | // worked best for that type of frame in the past. |
5855 | | // It also predicts whether another coding mode would have worked |
5856 | | // better than this coding mode. If that is the case, it remembers |
5857 | | // that for subsequent frames. |
5858 | | // It also does the same analysis for transform size selection. |
5859 | 54.1k | const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); |
5860 | 54.1k | int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; |
5861 | 54.1k | int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; |
5862 | 54.1k | const int is_alt_ref = frame_type == ALTREF_FRAME; |
5863 | | |
5864 | | /* prediction (compound, single or hybrid) mode selection */ |
5865 | 54.1k | if (is_alt_ref || !cpi->allow_comp_inter_inter) |
5866 | 54.1k | cm->reference_mode = SINGLE_REFERENCE; |
5867 | 0 | else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && |
5868 | 0 | mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && |
5869 | 0 | check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) |
5870 | 0 | cm->reference_mode = COMPOUND_REFERENCE; |
5871 | 0 | else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT]) |
5872 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
5873 | 0 | else |
5874 | 0 | cm->reference_mode = REFERENCE_MODE_SELECT; |
5875 | | |
5876 | 54.1k | if (cm->interp_filter == SWITCHABLE) |
5877 | 54.1k | cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); |
5878 | | |
5879 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
5880 | | start_timing(cpi, encode_frame_internal_time); |
5881 | | #endif |
5882 | 54.1k | encode_frame_internal(cpi); |
5883 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
5884 | | end_timing(cpi, encode_frame_internal_time); |
5885 | | #endif |
5886 | | |
5887 | 216k | for (i = 0; i < REFERENCE_MODES; ++i) |
5888 | 162k | mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; |
5889 | | |
5890 | 270k | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) |
5891 | 216k | filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2; |
5892 | | |
5893 | 54.1k | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
5894 | 0 | int single_count_zero = 0; |
5895 | 0 | int comp_count_zero = 0; |
5896 | |
|
5897 | 0 | for (i = 0; i < COMP_INTER_CONTEXTS; i++) { |
5898 | 0 | single_count_zero += counts->comp_inter[i][0]; |
5899 | 0 | comp_count_zero += counts->comp_inter[i][1]; |
5900 | 0 | } |
5901 | |
|
5902 | 0 | if (comp_count_zero == 0) { |
5903 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
5904 | 0 | vp9_zero(counts->comp_inter); |
5905 | 0 | } else if (single_count_zero == 0) { |
5906 | 0 | cm->reference_mode = COMPOUND_REFERENCE; |
5907 | 0 | vp9_zero(counts->comp_inter); |
5908 | 0 | } |
5909 | 0 | } |
5910 | | |
5911 | 54.1k | if (cm->tx_mode == TX_MODE_SELECT) { |
5912 | 26.3k | int count4x4 = 0; |
5913 | 26.3k | int count8x8_lp = 0, count8x8_8x8p = 0; |
5914 | 26.3k | int count16x16_16x16p = 0, count16x16_lp = 0; |
5915 | 26.3k | int count32x32 = 0; |
5916 | | |
5917 | 78.9k | for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { |
5918 | 52.6k | count4x4 += counts->tx.p32x32[i][TX_4X4]; |
5919 | 52.6k | count4x4 += counts->tx.p16x16[i][TX_4X4]; |
5920 | 52.6k | count4x4 += counts->tx.p8x8[i][TX_4X4]; |
5921 | | |
5922 | 52.6k | count8x8_lp += counts->tx.p32x32[i][TX_8X8]; |
5923 | 52.6k | count8x8_lp += counts->tx.p16x16[i][TX_8X8]; |
5924 | 52.6k | count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; |
5925 | | |
5926 | 52.6k | count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; |
5927 | 52.6k | count16x16_lp += counts->tx.p32x32[i][TX_16X16]; |
5928 | 52.6k | count32x32 += counts->tx.p32x32[i][TX_32X32]; |
5929 | 52.6k | } |
5930 | 26.3k | if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && |
5931 | 9.17k | count32x32 == 0) { |
5932 | 6.64k | cm->tx_mode = ALLOW_8X8; |
5933 | 6.64k | reset_skip_tx_size(cm, TX_8X8); |
5934 | 19.6k | } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && |
5935 | 5.46k | count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { |
5936 | 2.75k | cm->tx_mode = ONLY_4X4; |
5937 | 2.75k | reset_skip_tx_size(cm, TX_4X4); |
5938 | 16.9k | } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { |
5939 | 3.88k | cm->tx_mode = ALLOW_32X32; |
5940 | 13.0k | } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { |
5941 | 73 | cm->tx_mode = ALLOW_16X16; |
5942 | 73 | reset_skip_tx_size(cm, TX_16X16); |
5943 | 73 | } |
5944 | 26.3k | } |
5945 | 54.1k | } else { |
5946 | 0 | FRAME_COUNTS *counts = cpi->td.counts; |
5947 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
5948 | 0 | if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode && |
5949 | 0 | cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref && |
5950 | 0 | cm->frame_type != KEY_FRAME) |
5951 | 0 | cm->reference_mode = REFERENCE_MODE_SELECT; |
5952 | |
|
5953 | 0 | encode_frame_internal(cpi); |
5954 | |
|
5955 | 0 | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
5956 | 0 | int single_count_zero = 0; |
5957 | 0 | int comp_count_zero = 0; |
5958 | 0 | int i; |
5959 | 0 | for (i = 0; i < COMP_INTER_CONTEXTS; i++) { |
5960 | 0 | single_count_zero += counts->comp_inter[i][0]; |
5961 | 0 | comp_count_zero += counts->comp_inter[i][1]; |
5962 | 0 | } |
5963 | 0 | if (comp_count_zero == 0) { |
5964 | 0 | cm->reference_mode = SINGLE_REFERENCE; |
5965 | 0 | vp9_zero(counts->comp_inter); |
5966 | 0 | } else if (single_count_zero == 0) { |
5967 | 0 | cm->reference_mode = COMPOUND_REFERENCE; |
5968 | 0 | vp9_zero(counts->comp_inter); |
5969 | 0 | } |
5970 | 0 | } |
5971 | 0 | } |
5972 | | |
5973 | | // If segmented AQ is enabled compute the average AQ weighting. |
5974 | 54.1k | if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && |
5975 | 0 | (cm->seg.update_map || cm->seg.update_data)) { |
5976 | 0 | cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); |
5977 | 0 | } |
5978 | 54.1k | } |
5979 | | |
5980 | 1.53M | static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { |
5981 | 1.53M | const PREDICTION_MODE y_mode = mi->mode; |
5982 | 1.53M | const PREDICTION_MODE uv_mode = mi->uv_mode; |
5983 | 1.53M | const BLOCK_SIZE bsize = mi->sb_type; |
5984 | | |
5985 | 1.53M | if (bsize < BLOCK_8X8) { |
5986 | 911k | int idx, idy; |
5987 | 911k | const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; |
5988 | 911k | const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; |
5989 | 2.63M | for (idy = 0; idy < 2; idy += num_4x4_h) |
5990 | 4.85M | for (idx = 0; idx < 2; idx += num_4x4_w) |
5991 | 3.13M | ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode]; |
5992 | 911k | } else { |
5993 | 620k | ++counts->y_mode[size_group_lookup[bsize]][y_mode]; |
5994 | 620k | } |
5995 | | |
5996 | 1.53M | ++counts->uv_mode[y_mode][uv_mode]; |
5997 | 1.53M | } |
5998 | | |
5999 | | static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, |
6000 | 2.04M | int mi_row, int mi_col, BLOCK_SIZE bsize) { |
6001 | 2.04M | const VP9_COMMON *const cm = &cpi->common; |
6002 | 2.04M | MV mv = mi->mv[0].as_mv; |
6003 | 2.04M | const int bw = num_8x8_blocks_wide_lookup[bsize]; |
6004 | 2.04M | const int bh = num_8x8_blocks_high_lookup[bsize]; |
6005 | 2.04M | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
6006 | 2.04M | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
6007 | 2.04M | const int block_index = mi_row * cm->mi_cols + mi_col; |
6008 | 2.04M | int x, y; |
6009 | 4.51M | for (y = 0; y < ymis; y++) |
6010 | 6.59M | for (x = 0; x < xmis; x++) { |
6011 | 4.13M | int map_offset = block_index + y * cm->mi_cols + x; |
6012 | 4.13M | if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) && |
6013 | 667k | mi->segment_id <= CR_SEGMENT_ID_BOOST2) { |
6014 | 667k | if (abs(mv.row) < 8 && abs(mv.col) < 8) { |
6015 | 133k | if (cpi->consec_zero_mv[map_offset] < 255) |
6016 | 133k | cpi->consec_zero_mv[map_offset]++; |
6017 | 534k | } else { |
6018 | 534k | cpi->consec_zero_mv[map_offset] = 0; |
6019 | 534k | } |
6020 | 667k | } |
6021 | 4.13M | } |
6022 | 2.04M | } |
6023 | | |
6024 | | static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, |
6025 | | int output_enabled, int mi_row, int mi_col, |
6026 | 8.50M | BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { |
6027 | 8.50M | VP9_COMMON *const cm = &cpi->common; |
6028 | 8.50M | MACROBLOCK *const x = &td->mb; |
6029 | 8.50M | MACROBLOCKD *const xd = &x->e_mbd; |
6030 | 8.50M | MODE_INFO *mi = xd->mi[0]; |
6031 | 8.50M | const int seg_skip = |
6032 | 8.50M | segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP); |
6033 | 8.50M | x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 && |
6034 | 1.64M | cpi->oxcf.aq_mode != COMPLEXITY_AQ && |
6035 | 1.64M | cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && |
6036 | 1.64M | cpi->sf.allow_skip_recode; |
6037 | | |
6038 | 8.50M | if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode) |
6039 | 8.50M | memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); |
6040 | | |
6041 | 8.50M | x->skip_optimize = ctx->is_coded; |
6042 | 8.50M | ctx->is_coded = 1; |
6043 | 8.50M | x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; |
6044 | 8.50M | x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && |
6045 | 0 | x->q_index < QIDX_SKIP_THRESH); |
6046 | | |
6047 | 8.50M | if (x->skip_encode) return; |
6048 | | |
6049 | 8.50M | if (!is_inter_block(mi)) { |
6050 | 6.36M | int plane; |
6051 | | #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH |
6052 | | if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && |
6053 | | (xd->above_mi == NULL || xd->left_mi == NULL) && |
6054 | | need_top_left[mi->uv_mode]) |
6055 | | assert(0); |
6056 | | #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH |
6057 | 6.36M | mi->skip = 1; |
6058 | 25.4M | for (plane = 0; plane < MAX_MB_PLANE; ++plane) |
6059 | 19.0M | vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1); |
6060 | 6.36M | if (output_enabled) sum_intra_stats(td->counts, mi); |
6061 | 6.36M | vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, |
6062 | 6.36M | VPXMAX(bsize, BLOCK_8X8)); |
6063 | 6.36M | } else { |
6064 | 2.14M | int ref; |
6065 | 2.14M | const int is_compound = has_second_ref(mi); |
6066 | 2.14M | set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); |
6067 | 4.28M | for (ref = 0; ref < 1 + is_compound; ++ref) { |
6068 | 2.14M | YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]); |
6069 | 2.14M | assert(cfg != NULL); |
6070 | 2.14M | vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, |
6071 | 2.14M | &xd->block_refs[ref]->sf); |
6072 | 2.14M | } |
6073 | 2.14M | if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) |
6074 | 2.14M | vp9_build_inter_predictors_sby(xd, mi_row, mi_col, |
6075 | 2.14M | VPXMAX(bsize, BLOCK_8X8)); |
6076 | | |
6077 | 2.14M | vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, |
6078 | 2.14M | VPXMAX(bsize, BLOCK_8X8)); |
6079 | | |
6080 | | #if CONFIG_MISMATCH_DEBUG |
6081 | | if (output_enabled) { |
6082 | | int plane; |
6083 | | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
6084 | | const struct macroblockd_plane *pd = &xd->plane[plane]; |
6085 | | int pixel_c, pixel_r; |
6086 | | const BLOCK_SIZE plane_bsize = |
6087 | | get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); |
6088 | | const int bw = get_block_width(plane_bsize); |
6089 | | const int bh = get_block_height(plane_bsize); |
6090 | | mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, |
6091 | | pd->subsampling_x, pd->subsampling_y); |
6092 | | |
6093 | | mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, |
6094 | | pixel_r, bw, bh, |
6095 | | xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); |
6096 | | } |
6097 | | } |
6098 | | #endif |
6099 | | |
6100 | 2.14M | vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled); |
6101 | 2.14M | vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, |
6102 | 2.14M | VPXMAX(bsize, BLOCK_8X8)); |
6103 | 2.14M | } |
6104 | | |
6105 | 8.50M | if (seg_skip) { |
6106 | 0 | assert(mi->skip); |
6107 | 0 | } |
6108 | | |
6109 | 8.50M | if (output_enabled) { |
6110 | 2.04M | if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 && |
6111 | 554k | !(is_inter_block(mi) && mi->skip)) { |
6112 | 533k | ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), |
6113 | 533k | &td->counts->tx)[mi->tx_size]; |
6114 | 1.51M | } else { |
6115 | | // The new intra coding scheme requires no change of transform size |
6116 | 1.51M | if (is_inter_block(mi)) { |
6117 | 396k | mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], |
6118 | 396k | max_txsize_lookup[bsize]); |
6119 | 1.11M | } else { |
6120 | 1.11M | mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4; |
6121 | 1.11M | } |
6122 | 1.51M | } |
6123 | | |
6124 | 2.04M | ++td->counts->tx.tx_totals[mi->tx_size]; |
6125 | 2.04M | ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; |
6126 | 2.04M | if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && |
6127 | 0 | cpi->cyclic_refresh->content_mode) |
6128 | 0 | vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); |
6129 | 2.04M | if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && |
6130 | 2.04M | (!cpi->use_svc || |
6131 | 0 | (cpi->use_svc && |
6132 | 0 | !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && |
6133 | 0 | cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) |
6134 | 2.04M | update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); |
6135 | 2.04M | } |
6136 | 8.50M | } |