/src/libvpx/vp9/encoder/vp9_rdopt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <assert.h> |
12 | | #include <math.h> |
13 | | |
14 | | #include "./vp9_rtcd.h" |
15 | | #include "./vpx_dsp_rtcd.h" |
16 | | |
17 | | #include "vpx_dsp/vpx_dsp_common.h" |
18 | | #include "vpx_mem/vpx_mem.h" |
19 | | #include "vpx_ports/mem.h" |
20 | | #include "vpx_ports/system_state.h" |
21 | | |
22 | | #include "vp9/common/vp9_common.h" |
23 | | #include "vp9/common/vp9_entropy.h" |
24 | | #include "vp9/common/vp9_entropymode.h" |
25 | | #include "vp9/common/vp9_idct.h" |
26 | | #include "vp9/common/vp9_mvref_common.h" |
27 | | #include "vp9/common/vp9_pred_common.h" |
28 | | #include "vp9/common/vp9_quant_common.h" |
29 | | #include "vp9/common/vp9_reconinter.h" |
30 | | #include "vp9/common/vp9_reconintra.h" |
31 | | #include "vp9/common/vp9_scan.h" |
32 | | #include "vp9/common/vp9_seg_common.h" |
33 | | |
34 | | #if !CONFIG_REALTIME_ONLY |
35 | | #include "vp9/encoder/vp9_aq_variance.h" |
36 | | #endif |
37 | | #include "vp9/encoder/vp9_cost.h" |
38 | | #include "vp9/encoder/vp9_encodemb.h" |
39 | | #include "vp9/encoder/vp9_encodemv.h" |
40 | | #include "vp9/encoder/vp9_encoder.h" |
41 | | #include "vp9/encoder/vp9_mcomp.h" |
42 | | #include "vp9/encoder/vp9_quantize.h" |
43 | | #include "vp9/encoder/vp9_ratectrl.h" |
44 | | #include "vp9/encoder/vp9_rd.h" |
45 | | #include "vp9/encoder/vp9_rdopt.h" |
46 | | |
47 | | #define LAST_FRAME_MODE_MASK \ |
48 | 322k | ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) |
49 | | #define GOLDEN_FRAME_MODE_MASK \ |
50 | 137k | ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) |
51 | | #define ALT_REF_MODE_MASK \ |
52 | 24.4k | ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME)) |
53 | | |
54 | 4.46M | #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) |
55 | | |
56 | 0 | #define MIN_EARLY_TERM_INDEX 3 |
57 | | #define NEW_MV_DISCOUNT_FACTOR 8 |
58 | | |
59 | | typedef struct { |
60 | | PREDICTION_MODE mode; |
61 | | MV_REFERENCE_FRAME ref_frame[2]; |
62 | | } MODE_DEFINITION; |
63 | | |
64 | | typedef struct { |
65 | | MV_REFERENCE_FRAME ref_frame[2]; |
66 | | } REF_DEFINITION; |
67 | | |
68 | | struct rdcost_block_args { |
69 | | const VP9_COMP *cpi; |
70 | | MACROBLOCK *x; |
71 | | ENTROPY_CONTEXT t_above[16]; |
72 | | ENTROPY_CONTEXT t_left[16]; |
73 | | int this_rate; |
74 | | int64_t this_dist; |
75 | | int64_t this_sse; |
76 | | int64_t this_rd; |
77 | | int64_t best_rd; |
78 | | int exit_early; |
79 | | int use_fast_coef_costing; |
80 | | const ScanOrder *so; |
81 | | uint8_t skippable; |
82 | | struct buf_2d *this_recon; |
83 | | }; |
84 | | |
85 | 29.2M | #define LAST_NEW_MV_INDEX 6 |
86 | | |
87 | | #if !CONFIG_REALTIME_ONLY |
88 | | static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { |
89 | | { NEARESTMV, { LAST_FRAME, NO_REF_FRAME } }, |
90 | | { NEARESTMV, { ALTREF_FRAME, NO_REF_FRAME } }, |
91 | | { NEARESTMV, { GOLDEN_FRAME, NO_REF_FRAME } }, |
92 | | |
93 | | { DC_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
94 | | |
95 | | { NEWMV, { LAST_FRAME, NO_REF_FRAME } }, |
96 | | { NEWMV, { ALTREF_FRAME, NO_REF_FRAME } }, |
97 | | { NEWMV, { GOLDEN_FRAME, NO_REF_FRAME } }, |
98 | | |
99 | | { NEARMV, { LAST_FRAME, NO_REF_FRAME } }, |
100 | | { NEARMV, { ALTREF_FRAME, NO_REF_FRAME } }, |
101 | | { NEARMV, { GOLDEN_FRAME, NO_REF_FRAME } }, |
102 | | |
103 | | { ZEROMV, { LAST_FRAME, NO_REF_FRAME } }, |
104 | | { ZEROMV, { GOLDEN_FRAME, NO_REF_FRAME } }, |
105 | | { ZEROMV, { ALTREF_FRAME, NO_REF_FRAME } }, |
106 | | |
107 | | { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, |
108 | | { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
109 | | |
110 | | { TM_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
111 | | |
112 | | { NEARMV, { LAST_FRAME, ALTREF_FRAME } }, |
113 | | { NEWMV, { LAST_FRAME, ALTREF_FRAME } }, |
114 | | { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
115 | | { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
116 | | |
117 | | { ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, |
118 | | { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
119 | | |
120 | | { H_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
121 | | { V_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
122 | | { D135_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
123 | | { D207_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
124 | | { D153_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
125 | | { D63_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
126 | | { D117_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
127 | | { D45_PRED, { INTRA_FRAME, NO_REF_FRAME } }, |
128 | | }; |
129 | | |
130 | | static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { |
131 | | { { LAST_FRAME, NO_REF_FRAME } }, { { GOLDEN_FRAME, NO_REF_FRAME } }, |
132 | | { { ALTREF_FRAME, NO_REF_FRAME } }, { { LAST_FRAME, ALTREF_FRAME } }, |
133 | | { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NO_REF_FRAME } }, |
134 | | }; |
135 | | #endif // !CONFIG_REALTIME_ONLY |
136 | | |
137 | | static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n, |
138 | 8.97M | int min_plane, int max_plane) { |
139 | 8.97M | int i; |
140 | | |
141 | 26.9M | for (i = min_plane; i < max_plane; ++i) { |
142 | 18.0M | struct macroblock_plane *const p = &x->plane[i]; |
143 | 18.0M | struct macroblockd_plane *const pd = &x->e_mbd.plane[i]; |
144 | | |
145 | 18.0M | p->coeff = ctx->coeff_pbuf[i][m]; |
146 | 18.0M | p->qcoeff = ctx->qcoeff_pbuf[i][m]; |
147 | 18.0M | pd->dqcoeff = ctx->dqcoeff_pbuf[i][m]; |
148 | 18.0M | p->eobs = ctx->eobs_pbuf[i][m]; |
149 | | |
150 | 18.0M | ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n]; |
151 | 18.0M | ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n]; |
152 | 18.0M | ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n]; |
153 | 18.0M | ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n]; |
154 | | |
155 | 18.0M | ctx->coeff_pbuf[i][n] = p->coeff; |
156 | 18.0M | ctx->qcoeff_pbuf[i][n] = p->qcoeff; |
157 | 18.0M | ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff; |
158 | 18.0M | ctx->eobs_pbuf[i][n] = p->eobs; |
159 | 18.0M | } |
160 | 8.97M | } |
161 | | |
162 | | #if !CONFIG_REALTIME_ONLY |
163 | | // Planewise build inter prediction and compute rdcost with early termination |
164 | | // option |
165 | | static int build_inter_pred_model_rd_earlyterm( |
166 | | VP9_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize, MACROBLOCK *x, |
167 | | MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, |
168 | | int *skip_txfm_sb, int64_t *skip_sse_sb, int do_earlyterm, |
169 | 28.6M | int64_t best_rd) { |
170 | | // Note our transform coeffs are 8 times an orthogonal transform. |
171 | | // Hence quantizer step is also 8 times. To get effective quantizer |
172 | | // we need to divide by 8 before sending to modeling function. |
173 | 28.6M | int i; |
174 | 28.6M | int64_t rate_sum = 0; |
175 | 28.6M | int64_t dist_sum = 0; |
176 | 28.6M | const int ref = xd->mi[0]->ref_frame[0]; |
177 | 28.6M | unsigned int sse; |
178 | 28.6M | unsigned int var = 0; |
179 | 28.6M | int64_t total_sse = 0; |
180 | 28.6M | int skip_flag = 1; |
181 | 28.6M | const int shift = 6; |
182 | 28.6M | const int dequant_shift = |
183 | 28.6M | #if CONFIG_VP9_HIGHBITDEPTH |
184 | 28.6M | (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : |
185 | 28.6M | #endif // CONFIG_VP9_HIGHBITDEPTH |
186 | 28.6M | 3; |
187 | | |
188 | 28.6M | x->pred_sse[ref] = 0; |
189 | | |
190 | | // Build prediction signal, compute stats and RD cost on per-plane basis |
191 | 112M | for (i = 0; i < MAX_MB_PLANE; ++i) { |
192 | 85.7M | struct macroblock_plane *const p = &x->plane[i]; |
193 | 85.7M | struct macroblockd_plane *const pd = &xd->plane[i]; |
194 | 85.7M | const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); |
195 | 85.7M | const TX_SIZE max_tx_size = max_txsize_lookup[bs]; |
196 | 85.7M | const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; |
197 | 85.7M | const int64_t dc_thr = p->quant_thred[0] >> shift; |
198 | 85.7M | const int64_t ac_thr = p->quant_thred[1] >> shift; |
199 | 85.7M | unsigned int sum_sse = 0; |
200 | | // The low thresholds are used to measure if the prediction errors are |
201 | | // low enough so that we can skip the mode search. |
202 | 85.7M | const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2); |
203 | 85.7M | const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2); |
204 | 85.7M | int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); |
205 | 85.7M | int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); |
206 | 85.7M | int idx, idy; |
207 | 85.7M | int lw = b_width_log2_lookup[unit_size] + 2; |
208 | 85.7M | int lh = b_height_log2_lookup[unit_size] + 2; |
209 | 85.7M | unsigned int qstep; |
210 | 85.7M | unsigned int nlog2; |
211 | 85.7M | int64_t dist = 0; |
212 | | |
213 | | // Build inter predictor |
214 | 85.7M | vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); |
215 | | |
216 | | // Compute useful stats |
217 | 185M | for (idy = 0; idy < bh; ++idy) { |
218 | 214M | for (idx = 0; idx < bw; ++idx) { |
219 | 114M | uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); |
220 | 114M | uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); |
221 | 114M | int block_idx = (idy << 1) + idx; |
222 | 114M | int low_err_skip = 0; |
223 | | |
224 | 114M | var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride, |
225 | 114M | &sse); |
226 | 114M | x->bsse[(i << 2) + block_idx] = sse; |
227 | 114M | sum_sse += sse; |
228 | | |
229 | 114M | x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE; |
230 | 114M | if (!x->select_tx_size) { |
231 | | // Check if all ac coefficients can be quantized to zero. |
232 | 78.8M | if (var < ac_thr || var == 0) { |
233 | 8.83M | x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY; |
234 | | |
235 | | // Check if dc coefficient can be quantized to zero. |
236 | 8.83M | if (sse - var < dc_thr || sse == var) { |
237 | 7.04M | x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC; |
238 | | |
239 | 7.04M | if (!sse || (var < low_ac_thr && sse - var < low_dc_thr)) |
240 | 2.93M | low_err_skip = 1; |
241 | 7.04M | } |
242 | 8.83M | } |
243 | 78.8M | } |
244 | | |
245 | 114M | if (skip_flag && !low_err_skip) skip_flag = 0; |
246 | | |
247 | 114M | if (i == 0) x->pred_sse[ref] += sse; |
248 | 114M | } |
249 | 99.9M | } |
250 | | |
251 | 85.7M | total_sse += sum_sse; |
252 | 85.7M | qstep = pd->dequant[1] >> dequant_shift; |
253 | 85.7M | nlog2 = num_pels_log2_lookup[bs]; |
254 | | |
255 | | // Fast approximate the modelling function. |
256 | 85.7M | if (cpi->sf.simple_model_rd_from_var) { |
257 | 0 | int64_t rate; |
258 | 0 | if (qstep < 120) |
259 | 0 | rate = ((int64_t)sum_sse * (280 - qstep)) >> (16 - VP9_PROB_COST_SHIFT); |
260 | 0 | else |
261 | 0 | rate = 0; |
262 | 0 | dist = ((int64_t)sum_sse * qstep) >> 8; |
263 | 0 | rate_sum += rate; |
264 | 85.7M | } else { |
265 | 85.7M | int rate; |
266 | 85.7M | vp9_model_rd_from_var_lapndz(sum_sse, nlog2, qstep, &rate, &dist); |
267 | 85.7M | rate_sum += rate; |
268 | 85.7M | } |
269 | 85.7M | dist_sum += dist; |
270 | 85.7M | if (do_earlyterm) { |
271 | 26.0M | if (RDCOST(x->rdmult, x->rddiv, rate_sum, |
272 | 26.0M | dist_sum << VP9_DIST_SCALE_LOG2) >= best_rd) |
273 | 2.33M | return 1; |
274 | 26.0M | } |
275 | 85.7M | } |
276 | 26.3M | *skip_txfm_sb = skip_flag; |
277 | 26.3M | *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2; |
278 | 26.3M | *out_rate_sum = (int)rate_sum; |
279 | 26.3M | *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2; |
280 | | |
281 | 26.3M | return 0; |
282 | 28.6M | } |
283 | | #endif // !CONFIG_REALTIME_ONLY |
284 | | |
285 | | #if CONFIG_VP9_HIGHBITDEPTH |
286 | | int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, |
287 | | const tran_low_t *dqcoeff, intptr_t block_size, |
288 | 0 | int64_t *ssz, int bd) { |
289 | 0 | int i; |
290 | 0 | int64_t error = 0, sqcoeff = 0; |
291 | 0 | int shift = 2 * (bd - 8); |
292 | 0 | int rounding = shift > 0 ? 1 << (shift - 1) : 0; |
293 | |
|
294 | 0 | for (i = 0; i < block_size; i++) { |
295 | 0 | const int64_t diff = coeff[i] - dqcoeff[i]; |
296 | 0 | error += diff * diff; |
297 | 0 | sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; |
298 | 0 | } |
299 | 0 | assert(error >= 0 && sqcoeff >= 0); |
300 | 0 | error = (error + rounding) >> shift; |
301 | 0 | sqcoeff = (sqcoeff + rounding) >> shift; |
302 | |
|
303 | 0 | *ssz = sqcoeff; |
304 | 0 | return error; |
305 | 0 | } |
306 | | |
307 | | static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff, |
308 | | const tran_low_t *dqcoeff, |
309 | | intptr_t block_size, |
310 | 286M | int64_t *ssz, int bd) { |
311 | 286M | if (bd == 8) { |
312 | 286M | return vp9_block_error(coeff, dqcoeff, block_size, ssz); |
313 | 286M | } else { |
314 | 0 | return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd); |
315 | 0 | } |
316 | 286M | } |
317 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
318 | | |
319 | | int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, |
320 | 0 | intptr_t block_size, int64_t *ssz) { |
321 | 0 | int i; |
322 | 0 | int64_t error = 0, sqcoeff = 0; |
323 | |
|
324 | 0 | for (i = 0; i < block_size; i++) { |
325 | 0 | const int diff = coeff[i] - dqcoeff[i]; |
326 | 0 | error += diff * diff; |
327 | 0 | sqcoeff += coeff[i] * coeff[i]; |
328 | 0 | } |
329 | |
|
330 | 0 | *ssz = sqcoeff; |
331 | 0 | return error; |
332 | 0 | } |
333 | | |
334 | | int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, |
335 | 0 | int block_size) { |
336 | 0 | int i; |
337 | 0 | int64_t error = 0; |
338 | |
|
339 | 0 | for (i = 0; i < block_size; i++) { |
340 | 0 | const int diff = coeff[i] - dqcoeff[i]; |
341 | 0 | error += diff * diff; |
342 | 0 | } |
343 | |
|
344 | 0 | return error; |
345 | 0 | } |
346 | | |
347 | | /* The trailing '0' is a terminator which is used inside cost_coeffs() to |
348 | | * decide whether to include cost of a trailing EOB node or not (i.e. we |
349 | | * can skip this if the last coefficient in this transform block, e.g. the |
350 | | * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, |
351 | | * were non-zero). */ |
352 | | static const int16_t band_counts[TX_SIZES][8] = { |
353 | | { 1, 2, 3, 4, 3, 16 - 13, 0 }, |
354 | | { 1, 2, 3, 4, 11, 64 - 21, 0 }, |
355 | | { 1, 2, 3, 4, 11, 256 - 21, 0 }, |
356 | | { 1, 2, 3, 4, 11, 1024 - 21, 0 }, |
357 | | }; |
358 | | static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, |
359 | | int pt, const int16_t *scan, const int16_t *nb, |
360 | 530M | int use_fast_coef_costing) { |
361 | 530M | MACROBLOCKD *const xd = &x->e_mbd; |
362 | 530M | MODE_INFO *mi = xd->mi[0]; |
363 | 530M | const struct macroblock_plane *p = &x->plane[plane]; |
364 | 530M | const PLANE_TYPE type = get_plane_type(plane); |
365 | 530M | const int16_t *band_count = &band_counts[tx_size][1]; |
366 | 530M | const int eob = p->eobs[block]; |
367 | 530M | const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
368 | 530M | unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = |
369 | 530M | x->token_costs[tx_size][type][is_inter_block(mi)]; |
370 | 530M | uint8_t token_cache[32 * 32]; |
371 | 530M | int cost; |
372 | 530M | #if CONFIG_VP9_HIGHBITDEPTH |
373 | 530M | const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); |
374 | | #else |
375 | | const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); |
376 | | #endif |
377 | | |
378 | | // Check for consistency of tx_size with mode info |
379 | 530M | assert(type == PLANE_TYPE_Y |
380 | 530M | ? mi->tx_size == tx_size |
381 | 530M | : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size); |
382 | | |
383 | 530M | if (eob == 0) { |
384 | | // single eob token |
385 | 96.1M | cost = token_costs[0][0][pt][EOB_TOKEN]; |
386 | 434M | } else { |
387 | 434M | if (use_fast_coef_costing) { |
388 | 288M | int band_left = *band_count++; |
389 | 288M | int c; |
390 | | |
391 | | // dc token |
392 | 288M | int v = qcoeff[0]; |
393 | 288M | int16_t prev_t; |
394 | 288M | cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost); |
395 | 288M | cost += (*token_costs)[0][pt][prev_t]; |
396 | | |
397 | 288M | token_cache[0] = vp9_pt_energy_class[prev_t]; |
398 | 288M | ++token_costs; |
399 | | |
400 | | // ac tokens |
401 | 9.46G | for (c = 1; c < eob; c++) { |
402 | 9.17G | const int rc = scan[c]; |
403 | 9.17G | int16_t t; |
404 | | |
405 | 9.17G | v = qcoeff[rc]; |
406 | 9.17G | cost += vp9_get_token_cost(v, &t, cat6_high_cost); |
407 | 9.17G | cost += (*token_costs)[!prev_t][!prev_t][t]; |
408 | 9.17G | prev_t = t; |
409 | 9.17G | if (!--band_left) { |
410 | 1.14G | band_left = *band_count++; |
411 | 1.14G | ++token_costs; |
412 | 1.14G | } |
413 | 9.17G | } |
414 | | |
415 | | // eob token |
416 | 288M | if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; |
417 | | |
418 | 288M | } else { // !use_fast_coef_costing |
419 | 145M | int band_left = *band_count++; |
420 | 145M | int c; |
421 | | |
422 | | // dc token |
423 | 145M | int v = qcoeff[0]; |
424 | 145M | int16_t tok; |
425 | 145M | unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; |
426 | 145M | cost = vp9_get_token_cost(v, &tok, cat6_high_cost); |
427 | 145M | cost += (*token_costs)[0][pt][tok]; |
428 | | |
429 | 145M | token_cache[0] = vp9_pt_energy_class[tok]; |
430 | 145M | ++token_costs; |
431 | | |
432 | 145M | tok_cost_ptr = &((*token_costs)[!tok]); |
433 | | |
434 | | // ac tokens |
435 | 3.78G | for (c = 1; c < eob; c++) { |
436 | 3.64G | const int rc = scan[c]; |
437 | | |
438 | 3.64G | v = qcoeff[rc]; |
439 | 3.64G | cost += vp9_get_token_cost(v, &tok, cat6_high_cost); |
440 | 3.64G | pt = get_coef_context(nb, token_cache, c); |
441 | 3.64G | cost += (*tok_cost_ptr)[pt][tok]; |
442 | 3.64G | token_cache[rc] = vp9_pt_energy_class[tok]; |
443 | 3.64G | if (!--band_left) { |
444 | 596M | band_left = *band_count++; |
445 | 596M | ++token_costs; |
446 | 596M | } |
447 | 3.64G | tok_cost_ptr = &((*token_costs)[!tok]); |
448 | 3.64G | } |
449 | | |
450 | | // eob token |
451 | 145M | if (band_left) { |
452 | 52.3M | pt = get_coef_context(nb, token_cache, c); |
453 | 52.3M | cost += (*token_costs)[0][pt][EOB_TOKEN]; |
454 | 52.3M | } |
455 | 145M | } |
456 | 434M | } |
457 | | |
458 | 530M | return cost; |
459 | 530M | } |
460 | | |
461 | | // Copy all visible 4x4s in the transform block. |
462 | | static void copy_block_visible(const MACROBLOCKD *xd, |
463 | | const struct macroblockd_plane *const pd, |
464 | | const uint8_t *src, const int src_stride, |
465 | | uint8_t *dst, const int dst_stride, int blk_row, |
466 | | int blk_col, const BLOCK_SIZE plane_bsize, |
467 | 0 | const BLOCK_SIZE tx_bsize) { |
468 | 0 | const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; |
469 | 0 | const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; |
470 | 0 | const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; |
471 | 0 | const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; |
472 | 0 | int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, |
473 | 0 | pd->subsampling_x, blk_col); |
474 | 0 | int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, |
475 | 0 | pd->subsampling_y, blk_row); |
476 | 0 | const int is_highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; |
477 | 0 | if (tx_bsize == BLOCK_4X4 || |
478 | 0 | (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { |
479 | 0 | const int w = tx_4x4_w << 2; |
480 | 0 | const int h = tx_4x4_h << 2; |
481 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
482 | 0 | if (is_highbd) { |
483 | 0 | vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src), src_stride, |
484 | 0 | CONVERT_TO_SHORTPTR(dst), dst_stride, NULL, 0, 0, |
485 | 0 | 0, 0, w, h, xd->bd); |
486 | 0 | } else { |
487 | 0 | #endif |
488 | 0 | vpx_convolve_copy(src, src_stride, dst, dst_stride, NULL, 0, 0, 0, 0, w, |
489 | 0 | h); |
490 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
491 | 0 | } |
492 | 0 | #endif |
493 | 0 | } else { |
494 | 0 | int r, c; |
495 | 0 | int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); |
496 | 0 | int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); |
497 | | // if we are in the unrestricted motion border. |
498 | 0 | for (r = 0; r < max_r; ++r) { |
499 | | // Skip visiting the sub blocks that are wholly within the UMV. |
500 | 0 | for (c = 0; c < max_c; ++c) { |
501 | 0 | const uint8_t *src_ptr = src + r * src_stride * 4 + c * 4; |
502 | 0 | uint8_t *dst_ptr = dst + r * dst_stride * 4 + c * 4; |
503 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
504 | 0 | if (is_highbd) { |
505 | 0 | vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src_ptr), src_stride, |
506 | 0 | CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, |
507 | 0 | NULL, 0, 0, 0, 0, 4, 4, xd->bd); |
508 | 0 | } else { |
509 | 0 | #endif |
510 | 0 | vpx_convolve_copy(src_ptr, src_stride, dst_ptr, dst_stride, NULL, 0, |
511 | 0 | 0, 0, 0, 4, 4); |
512 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
513 | 0 | } |
514 | 0 | #endif |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } |
518 | 0 | (void)is_highbd; |
519 | 0 | } |
520 | | |
521 | | // Compute the pixel domain sum square error on all visible 4x4s in the |
522 | | // transform block. |
523 | | static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd, |
524 | | const struct macroblockd_plane *const pd, |
525 | | const uint8_t *src, const int src_stride, |
526 | | const uint8_t *dst, const int dst_stride, int blk_row, |
527 | | int blk_col, const BLOCK_SIZE plane_bsize, |
528 | 119M | const BLOCK_SIZE tx_bsize) { |
529 | 119M | unsigned int sse = 0; |
530 | 119M | const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; |
531 | 119M | const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; |
532 | 119M | const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; |
533 | 119M | const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; |
534 | 119M | int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, |
535 | 119M | pd->subsampling_x, blk_col); |
536 | 119M | int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, |
537 | 119M | pd->subsampling_y, blk_row); |
538 | 119M | if (tx_bsize == BLOCK_4X4 || |
539 | 119M | (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { |
540 | 119M | cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); |
541 | 119M | } else { |
542 | 736k | const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf; |
543 | 736k | int r, c; |
544 | 736k | unsigned this_sse = 0; |
545 | 736k | int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); |
546 | 736k | int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); |
547 | 736k | sse = 0; |
548 | | // if we are in the unrestricted motion border. |
549 | 3.08M | for (r = 0; r < max_r; ++r) { |
550 | | // Skip visiting the sub blocks that are wholly within the UMV. |
551 | 10.9M | for (c = 0; c < max_c; ++c) { |
552 | 8.64M | vf_4x4(src + r * src_stride * 4 + c * 4, src_stride, |
553 | 8.64M | dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse); |
554 | 8.64M | sse += this_sse; |
555 | 8.64M | } |
556 | 2.34M | } |
557 | 736k | } |
558 | 119M | return sse; |
559 | 119M | } |
560 | | |
561 | | static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, |
562 | | BLOCK_SIZE plane_bsize, int block, int blk_row, |
563 | | int blk_col, TX_SIZE tx_size, int64_t *out_dist, |
564 | | int64_t *out_sse, struct buf_2d *out_recon, |
565 | 280M | int sse_calc_done) { |
566 | 280M | MACROBLOCKD *const xd = &x->e_mbd; |
567 | 280M | const struct macroblock_plane *const p = &x->plane[plane]; |
568 | 280M | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
569 | 280M | const int eob = p->eobs[block]; |
570 | | |
571 | 280M | if (!out_recon && x->block_tx_domain && eob) { |
572 | 217M | const int ss_txfrm_size = tx_size << 1; |
573 | 217M | int64_t this_sse; |
574 | 217M | const int shift = tx_size == TX_32X32 ? 0 : 2; |
575 | 217M | const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
576 | 217M | const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
577 | 217M | #if CONFIG_VP9_HIGHBITDEPTH |
578 | 217M | const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; |
579 | 217M | *out_dist = vp9_highbd_block_error_dispatch( |
580 | 217M | coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >> |
581 | 217M | shift; |
582 | | #else |
583 | | *out_dist = |
584 | | vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> |
585 | | shift; |
586 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
587 | 217M | *out_sse = this_sse >> shift; |
588 | | |
589 | 217M | if (x->skip_encode && !is_inter_block(xd->mi[0])) { |
590 | | // TODO(jingning): tune the model to better capture the distortion. |
591 | 0 | const int64_t mean_quant_error = |
592 | 0 | (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> |
593 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
594 | 0 | (shift + 2 + (bd - 8) * 2); |
595 | | #else |
596 | | (shift + 2); |
597 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
598 | 0 | *out_dist += (mean_quant_error >> 4); |
599 | 0 | *out_sse += mean_quant_error; |
600 | 0 | } |
601 | 217M | } else { |
602 | 62.9M | const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; |
603 | 62.9M | const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize]; |
604 | 62.9M | const int src_stride = p->src.stride; |
605 | 62.9M | const int dst_stride = pd->dst.stride; |
606 | 62.9M | const int src_idx = 4 * (blk_row * src_stride + blk_col); |
607 | 62.9M | const int dst_idx = 4 * (blk_row * dst_stride + blk_col); |
608 | 62.9M | const uint8_t *src = &p->src.buf[src_idx]; |
609 | 62.9M | const uint8_t *dst = &pd->dst.buf[dst_idx]; |
610 | 62.9M | uint8_t *out_recon_ptr = 0; |
611 | | |
612 | 62.9M | const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
613 | 62.9M | unsigned int tmp; |
614 | | |
615 | 62.9M | if (sse_calc_done) { |
616 | 0 | tmp = (unsigned int)(*out_sse); |
617 | 62.9M | } else { |
618 | 62.9M | tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row, |
619 | 62.9M | blk_col, plane_bsize, tx_bsize); |
620 | 62.9M | } |
621 | 62.9M | *out_sse = (int64_t)tmp * 16; |
622 | 62.9M | if (out_recon) { |
623 | 0 | const int out_recon_idx = 4 * (blk_row * out_recon->stride + blk_col); |
624 | 0 | out_recon_ptr = &out_recon->buf[out_recon_idx]; |
625 | 0 | copy_block_visible(xd, pd, dst, dst_stride, out_recon_ptr, |
626 | 0 | out_recon->stride, blk_row, blk_col, plane_bsize, |
627 | 0 | tx_bsize); |
628 | 0 | } |
629 | | |
630 | 62.9M | if (eob) { |
631 | 13.1M | #if CONFIG_VP9_HIGHBITDEPTH |
632 | 13.1M | DECLARE_ALIGNED(16, uint16_t, recon16[1024]); |
633 | 13.1M | uint8_t *recon = (uint8_t *)recon16; |
634 | | #else |
635 | | DECLARE_ALIGNED(16, uint8_t, recon[1024]); |
636 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
637 | | |
638 | 13.1M | #if CONFIG_VP9_HIGHBITDEPTH |
639 | 13.1M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
640 | 0 | vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16, |
641 | 0 | 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd); |
642 | 0 | if (xd->lossless) { |
643 | 0 | vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd); |
644 | 0 | } else { |
645 | 0 | switch (tx_size) { |
646 | 0 | case TX_4X4: |
647 | 0 | vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd); |
648 | 0 | break; |
649 | 0 | case TX_8X8: |
650 | 0 | vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd); |
651 | 0 | break; |
652 | 0 | case TX_16X16: |
653 | 0 | vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd); |
654 | 0 | break; |
655 | 0 | default: |
656 | 0 | assert(tx_size == TX_32X32); |
657 | 0 | vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd); |
658 | 0 | break; |
659 | 0 | } |
660 | 0 | } |
661 | 0 | recon = CONVERT_TO_BYTEPTR(recon16); |
662 | 13.1M | } else { |
663 | 13.1M | #endif // CONFIG_VP9_HIGHBITDEPTH |
664 | 13.1M | vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs); |
665 | 13.1M | switch (tx_size) { |
666 | 112k | case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break; |
667 | 816k | case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break; |
668 | 3.19M | case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break; |
669 | 8.98M | default: |
670 | 8.98M | assert(tx_size == TX_4X4); |
671 | | // this is like vp9_short_idct4x4 but has a special case around |
672 | | // eob<=1, which is significant (not just an optimization) for |
673 | | // the lossless case. |
674 | 8.98M | x->inv_txfm_add(dqcoeff, recon, 32, eob); |
675 | 8.98M | break; |
676 | 13.1M | } |
677 | 13.1M | #if CONFIG_VP9_HIGHBITDEPTH |
678 | 13.1M | } |
679 | 13.1M | #endif // CONFIG_VP9_HIGHBITDEPTH |
680 | | |
681 | 13.1M | tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col, |
682 | 13.1M | plane_bsize, tx_bsize); |
683 | 13.1M | if (out_recon) { |
684 | 0 | copy_block_visible(xd, pd, recon, 32, out_recon_ptr, out_recon->stride, |
685 | 0 | blk_row, blk_col, plane_bsize, tx_bsize); |
686 | 0 | } |
687 | 13.1M | } |
688 | | |
689 | 62.9M | *out_dist = (int64_t)tmp * 16; |
690 | 62.9M | } |
691 | 280M | } |
692 | | |
693 | | static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx, |
694 | 318M | struct rdcost_block_args *args) { |
695 | 318M | return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan, |
696 | 318M | args->so->neighbors, args->use_fast_coef_costing); |
697 | 318M | } |
698 | | |
699 | | static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, |
700 | 333M | BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { |
701 | 333M | struct rdcost_block_args *args = arg; |
702 | 333M | MACROBLOCK *const x = args->x; |
703 | 333M | MACROBLOCKD *const xd = &x->e_mbd; |
704 | 333M | MODE_INFO *const mi = xd->mi[0]; |
705 | 333M | int64_t rd1, rd2, rd; |
706 | 333M | int rate; |
707 | 333M | int64_t dist = INT64_MAX; |
708 | 333M | int64_t sse = INT64_MAX; |
709 | 333M | const int coeff_ctx = |
710 | 333M | combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]); |
711 | 333M | struct buf_2d *recon = args->this_recon; |
712 | 333M | const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; |
713 | 333M | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
714 | 333M | const int dst_stride = pd->dst.stride; |
715 | 333M | const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; |
716 | 333M | const int enable_trellis_opt = args->cpi->sf.trellis_opt_tx_rd.method; |
717 | 333M | const double trellis_opt_thresh = args->cpi->sf.trellis_opt_tx_rd.thresh; |
718 | 333M | int sse_calc_done = 0; |
719 | | #if CONFIG_MISMATCH_DEBUG |
720 | | struct encode_b_args encode_b_arg = { |
721 | | x, enable_trellis_opt, trellis_opt_thresh, &sse_calc_done, |
722 | | &sse, args->t_above, args->t_left, &mi->skip, |
723 | | 0, // mi_row |
724 | | 0, // mi_col |
725 | | 0 // output_enabled |
726 | | }; |
727 | | #else |
728 | 333M | struct encode_b_args encode_b_arg = { |
729 | 333M | x, enable_trellis_opt, trellis_opt_thresh, &sse_calc_done, |
730 | 333M | &sse, args->t_above, args->t_left, &mi->skip |
731 | 333M | }; |
732 | 333M | #endif |
733 | | |
734 | 333M | if (args->exit_early) return; |
735 | | |
736 | 325M | if (!is_inter_block(mi)) { |
737 | 263M | vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, |
738 | 263M | &encode_b_arg); |
739 | 263M | if (recon) { |
740 | 0 | uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)]; |
741 | 0 | copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride, |
742 | 0 | blk_row, blk_col, plane_bsize, tx_bsize); |
743 | 0 | } |
744 | 263M | if (x->block_tx_domain) { |
745 | 220M | dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, |
746 | 220M | tx_size, &dist, &sse, /*out_recon=*/NULL, sse_calc_done); |
747 | 220M | } else { |
748 | 43.9M | const struct macroblock_plane *const p = &x->plane[plane]; |
749 | 43.9M | const int src_stride = p->src.stride; |
750 | 43.9M | const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; |
751 | 43.9M | unsigned int tmp; |
752 | 43.9M | if (!sse_calc_done) { |
753 | 43.9M | const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
754 | 43.9M | const int16_t *diff = |
755 | 43.9M | &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; |
756 | 43.9M | int visible_width, visible_height; |
757 | 43.9M | sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col, |
758 | 43.9M | plane_bsize, tx_bsize, &visible_width, |
759 | 43.9M | &visible_height); |
760 | 43.9M | } |
761 | 43.9M | #if CONFIG_VP9_HIGHBITDEPTH |
762 | 43.9M | if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8)) |
763 | 0 | sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2); |
764 | 43.9M | #endif // CONFIG_VP9_HIGHBITDEPTH |
765 | 43.9M | sse = sse * 16; |
766 | 43.9M | tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride, |
767 | 43.9M | blk_row, blk_col, plane_bsize, tx_bsize); |
768 | 43.9M | dist = (int64_t)tmp * 16; |
769 | 43.9M | } |
770 | 263M | } else { |
771 | 61.1M | int skip_txfm_flag = SKIP_TXFM_NONE; |
772 | 61.1M | if (max_txsize_lookup[plane_bsize] == tx_size) |
773 | 40.2M | skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))]; |
774 | | |
775 | | // This reduces the risk of bad perceptual quality due to bad prediction. |
776 | | // We always force the encoder to perform transform and quantization. |
777 | 61.1M | if (!args->cpi->sf.allow_skip_txfm_ac_dc && |
778 | 61.1M | skip_txfm_flag == SKIP_TXFM_AC_DC) { |
779 | 1.88M | skip_txfm_flag = SKIP_TXFM_NONE; |
780 | 1.88M | } |
781 | | |
782 | 61.1M | if (skip_txfm_flag == SKIP_TXFM_NONE || |
783 | 61.1M | (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) { |
784 | 60.8M | const struct macroblock_plane *const p = &x->plane[plane]; |
785 | 60.8M | const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
786 | 60.8M | const int16_t *const diff = |
787 | 60.8M | &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; |
788 | 60.8M | const int use_trellis_opt = |
789 | 60.8M | do_trellis_opt(pd, diff, diff_stride, blk_row, blk_col, plane_bsize, |
790 | 60.8M | tx_size, &encode_b_arg); |
791 | | // full forward transform and quantization |
792 | 60.8M | vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); |
793 | 60.8M | if (use_trellis_opt) vp9_optimize_b(x, plane, block, tx_size, coeff_ctx); |
794 | 60.8M | dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, |
795 | 60.8M | tx_size, &dist, &sse, recon, sse_calc_done); |
796 | 60.8M | } else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) { |
797 | | // compute DC coefficient |
798 | 301k | tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); |
799 | 301k | tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); |
800 | 301k | vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize, |
801 | 301k | tx_size); |
802 | 301k | sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; |
803 | 301k | dist = sse; |
804 | 301k | if (x->plane[plane].eobs[block]) { |
805 | 297k | const int64_t orig_sse = (int64_t)coeff[0] * coeff[0]; |
806 | 297k | const int64_t resd_sse = coeff[0] - dqcoeff[0]; |
807 | 297k | int64_t dc_correct = orig_sse - resd_sse * resd_sse; |
808 | 297k | #if CONFIG_VP9_HIGHBITDEPTH |
809 | 297k | dc_correct >>= ((xd->bd - 8) * 2); |
810 | 297k | #endif |
811 | 297k | if (tx_size != TX_32X32) dc_correct >>= 2; |
812 | | |
813 | 297k | dist = VPXMAX(0, sse - dc_correct); |
814 | 297k | } |
815 | 301k | } else { |
816 | 0 | assert(0 && "allow_skip_txfm_ac_dc does not allow SKIP_TXFM_AC_DC."); |
817 | 0 | } |
818 | 61.1M | } |
819 | | |
820 | 325M | rd = RDCOST(x->rdmult, x->rddiv, 0, dist); |
821 | 325M | if (args->this_rd + rd > args->best_rd) { |
822 | 6.80M | args->exit_early = 1; |
823 | 6.80M | return; |
824 | 6.80M | } |
825 | | |
826 | 318M | rate = rate_block(plane, block, tx_size, coeff_ctx, args); |
827 | 318M | args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0; |
828 | 318M | args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0; |
829 | 318M | rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); |
830 | 318M | rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); |
831 | | |
832 | | // TODO(jingning): temporarily enabled only for luma component |
833 | 318M | rd = VPXMIN(rd1, rd2); |
834 | 318M | if (plane == 0) { |
835 | 126M | x->zcoeff_blk[tx_size][block] = |
836 | 126M | !x->plane[plane].eobs[block] || |
837 | 126M | (x->sharpness == 0 && rd1 > rd2 && !xd->lossless); |
838 | 126M | x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block]; |
839 | 126M | } |
840 | | |
841 | 318M | args->this_rate += rate; |
842 | 318M | args->this_dist += dist; |
843 | 318M | args->this_sse += sse; |
844 | 318M | args->this_rd += rd; |
845 | | |
846 | 318M | if (args->this_rd > args->best_rd) { |
847 | 29.0M | args->exit_early = 1; |
848 | 29.0M | return; |
849 | 29.0M | } |
850 | | |
851 | 289M | args->skippable &= !x->plane[plane].eobs[block]; |
852 | 289M | } |
853 | | |
854 | | static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
855 | | int64_t *distortion, int *skippable, int64_t *sse, |
856 | | int64_t ref_best_rd, int plane, BLOCK_SIZE bsize, |
857 | | TX_SIZE tx_size, int use_fast_coef_costing, |
858 | 202M | struct buf_2d *recon) { |
859 | 202M | MACROBLOCKD *const xd = &x->e_mbd; |
860 | 202M | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
861 | 202M | struct rdcost_block_args args; |
862 | 202M | vp9_zero(args); |
863 | 202M | args.cpi = cpi; |
864 | 202M | args.x = x; |
865 | 202M | args.best_rd = ref_best_rd; |
866 | 202M | args.use_fast_coef_costing = use_fast_coef_costing; |
867 | 202M | args.skippable = 1; |
868 | 202M | args.this_recon = recon; |
869 | | |
870 | 202M | if (plane == 0) xd->mi[0]->tx_size = tx_size; |
871 | | |
872 | 202M | vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); |
873 | | |
874 | 202M | args.so = get_scan(xd, tx_size, get_plane_type(plane), 0); |
875 | | |
876 | 202M | vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, |
877 | 202M | &args); |
878 | 202M | if (args.exit_early) { |
879 | 35.8M | *rate = INT_MAX; |
880 | 35.8M | *distortion = INT64_MAX; |
881 | 35.8M | *sse = INT64_MAX; |
882 | 35.8M | *skippable = 0; |
883 | 166M | } else { |
884 | 166M | *distortion = args.this_dist; |
885 | 166M | *rate = args.this_rate; |
886 | 166M | *sse = args.this_sse; |
887 | 166M | *skippable = args.skippable; |
888 | 166M | } |
889 | 202M | } |
890 | | |
891 | | static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
892 | | int64_t *distortion, int *skip, int64_t *sse, |
893 | | int64_t ref_best_rd, BLOCK_SIZE bs, |
894 | 24.0M | struct buf_2d *recon) { |
895 | 24.0M | const TX_SIZE max_tx_size = max_txsize_lookup[bs]; |
896 | 24.0M | VP9_COMMON *const cm = &cpi->common; |
897 | 24.0M | const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; |
898 | 24.0M | MACROBLOCKD *const xd = &x->e_mbd; |
899 | 24.0M | MODE_INFO *const mi = xd->mi[0]; |
900 | | |
901 | 24.0M | mi->tx_size = VPXMIN(max_tx_size, largest_tx_size); |
902 | | |
903 | 24.0M | txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs, |
904 | 24.0M | mi->tx_size, cpi->sf.use_fast_coef_costing, recon); |
905 | 24.0M | } |
906 | | |
907 | | static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
908 | | int64_t *distortion, int *skip, |
909 | | int64_t *psse, int64_t ref_best_rd, |
910 | 23.7M | BLOCK_SIZE bs, struct buf_2d *recon) { |
911 | 23.7M | const TX_SIZE max_tx_size = max_txsize_lookup[bs]; |
912 | 23.7M | VP9_COMMON *const cm = &cpi->common; |
913 | 23.7M | MACROBLOCKD *const xd = &x->e_mbd; |
914 | 23.7M | MODE_INFO *const mi = xd->mi[0]; |
915 | 23.7M | vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); |
916 | 23.7M | int r[TX_SIZES][2], s[TX_SIZES]; |
917 | 23.7M | int64_t d[TX_SIZES], sse[TX_SIZES]; |
918 | 23.7M | int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX }, |
919 | 23.7M | { INT64_MAX, INT64_MAX }, |
920 | 23.7M | { INT64_MAX, INT64_MAX }, |
921 | 23.7M | { INT64_MAX, INT64_MAX } }; |
922 | 23.7M | int n; |
923 | 23.7M | int s0, s1; |
924 | 23.7M | int64_t best_rd = ref_best_rd; |
925 | 23.7M | TX_SIZE best_tx = max_tx_size; |
926 | 23.7M | int start_tx, end_tx; |
927 | 23.7M | const int tx_size_ctx = get_tx_size_context(xd); |
928 | 23.7M | #if CONFIG_VP9_HIGHBITDEPTH |
929 | 23.7M | DECLARE_ALIGNED(16, uint16_t, recon_buf16[TX_SIZES][64 * 64]); |
930 | 23.7M | uint8_t *recon_buf[TX_SIZES]; |
931 | 118M | for (n = 0; n < TX_SIZES; ++n) { |
932 | 94.9M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
933 | 0 | recon_buf[n] = CONVERT_TO_BYTEPTR(recon_buf16[n]); |
934 | 94.9M | } else { |
935 | 94.9M | recon_buf[n] = (uint8_t *)recon_buf16[n]; |
936 | 94.9M | } |
937 | 94.9M | } |
938 | | #else |
939 | | DECLARE_ALIGNED(16, uint8_t, recon_buf[TX_SIZES][64 * 64]); |
940 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
941 | | |
942 | 23.7M | assert(skip_prob > 0); |
943 | 23.7M | s0 = vp9_cost_bit(skip_prob, 0); |
944 | 23.7M | s1 = vp9_cost_bit(skip_prob, 1); |
945 | | |
946 | 23.7M | if (cm->tx_mode == TX_MODE_SELECT) { |
947 | 23.7M | start_tx = max_tx_size; |
948 | 23.7M | end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0); |
949 | 23.7M | if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx); |
950 | 23.7M | } else { |
951 | 0 | TX_SIZE chosen_tx_size = |
952 | 0 | VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]); |
953 | 0 | start_tx = chosen_tx_size; |
954 | 0 | end_tx = chosen_tx_size; |
955 | 0 | } |
956 | | |
957 | 36.1M | for (n = start_tx; n >= end_tx; n--) { |
958 | 31.8M | const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n]; |
959 | 31.8M | if (recon) { |
960 | 0 | struct buf_2d this_recon; |
961 | 0 | this_recon.buf = recon_buf[n]; |
962 | 0 | this_recon.stride = recon->stride; |
963 | 0 | txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, |
964 | 0 | n, cpi->sf.use_fast_coef_costing, &this_recon); |
965 | 31.8M | } else { |
966 | 31.8M | txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, |
967 | 31.8M | n, cpi->sf.use_fast_coef_costing, 0); |
968 | 31.8M | } |
969 | 31.8M | r[n][1] = r[n][0]; |
970 | 31.8M | if (r[n][0] < INT_MAX) { |
971 | 16.2M | r[n][1] += r_tx_size; |
972 | 16.2M | } |
973 | 31.8M | if (d[n] == INT64_MAX || r[n][0] == INT_MAX) { |
974 | 15.5M | rd[n][0] = rd[n][1] = INT64_MAX; |
975 | 16.2M | } else if (s[n]) { |
976 | 3.26M | if (is_inter_block(mi)) { |
977 | 633k | rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); |
978 | 633k | r[n][1] -= r_tx_size; |
979 | 2.63M | } else { |
980 | 2.63M | rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); |
981 | 2.63M | rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); |
982 | 2.63M | } |
983 | 13.0M | } else { |
984 | 13.0M | rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); |
985 | 13.0M | rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); |
986 | 13.0M | } |
987 | | |
988 | 31.8M | if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { |
989 | 4.23M | rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); |
990 | 4.23M | rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); |
991 | 4.23M | } |
992 | | |
993 | | // Early termination in transform size search. |
994 | 31.8M | if (cpi->sf.tx_size_search_breakout && |
995 | 31.8M | (rd[n][1] == INT64_MAX || |
996 | 31.8M | (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1)) |
997 | 19.4M | break; |
998 | | |
999 | 12.4M | if (rd[n][1] < best_rd) { |
1000 | 12.1M | best_tx = n; |
1001 | 12.1M | best_rd = rd[n][1]; |
1002 | 12.1M | } |
1003 | 12.4M | } |
1004 | 23.7M | mi->tx_size = best_tx; |
1005 | | |
1006 | 23.7M | *distortion = d[mi->tx_size]; |
1007 | 23.7M | *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT]; |
1008 | 23.7M | *skip = s[mi->tx_size]; |
1009 | 23.7M | *psse = sse[mi->tx_size]; |
1010 | 23.7M | if (recon) { |
1011 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1012 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1013 | 0 | memcpy(CONVERT_TO_SHORTPTR(recon->buf), |
1014 | 0 | CONVERT_TO_SHORTPTR(recon_buf[mi->tx_size]), |
1015 | 0 | 64 * 64 * sizeof(uint16_t)); |
1016 | 0 | } else { |
1017 | 0 | #endif |
1018 | 0 | memcpy(recon->buf, recon_buf[mi->tx_size], 64 * 64); |
1019 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1020 | 0 | } |
1021 | 0 | #endif |
1022 | 0 | } |
1023 | 23.7M | } |
1024 | | |
1025 | | static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
1026 | | int64_t *distortion, int *skip, int64_t *psse, |
1027 | | BLOCK_SIZE bs, int64_t ref_best_rd, |
1028 | 47.7M | struct buf_2d *recon) { |
1029 | 47.7M | MACROBLOCKD *xd = &x->e_mbd; |
1030 | 47.7M | int64_t sse; |
1031 | 47.7M | int64_t *ret_sse = psse ? psse : &sse; |
1032 | | |
1033 | 47.7M | assert(bs == xd->mi[0]->sb_type); |
1034 | | |
1035 | 47.7M | if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { |
1036 | 24.0M | choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, |
1037 | 24.0M | bs, recon); |
1038 | 24.0M | } else { |
1039 | 23.7M | choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, |
1040 | 23.7M | bs, recon); |
1041 | 23.7M | } |
1042 | 47.7M | } |
1043 | | |
1044 | | static int conditional_skipintra(PREDICTION_MODE mode, |
1045 | 0 | PREDICTION_MODE best_intra_mode) { |
1046 | 0 | if (mode == D117_PRED && best_intra_mode != V_PRED && |
1047 | 0 | best_intra_mode != D135_PRED) |
1048 | 0 | return 1; |
1049 | 0 | if (mode == D63_PRED && best_intra_mode != V_PRED && |
1050 | 0 | best_intra_mode != D45_PRED) |
1051 | 0 | return 1; |
1052 | 0 | if (mode == D207_PRED && best_intra_mode != H_PRED && |
1053 | 0 | best_intra_mode != D45_PRED) |
1054 | 0 | return 1; |
1055 | 0 | if (mode == D153_PRED && best_intra_mode != H_PRED && |
1056 | 0 | best_intra_mode != D135_PRED) |
1057 | 0 | return 1; |
1058 | 0 | return 0; |
1059 | 0 | } |
1060 | | |
1061 | | static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row, |
1062 | | int col, PREDICTION_MODE *best_mode, |
1063 | | const int *bmode_costs, ENTROPY_CONTEXT *a, |
1064 | | ENTROPY_CONTEXT *l, int *bestrate, |
1065 | | int *bestratey, int64_t *bestdistortion, |
1066 | 11.4M | BLOCK_SIZE bsize, int64_t rd_thresh) { |
1067 | 11.4M | PREDICTION_MODE mode; |
1068 | 11.4M | MACROBLOCKD *const xd = &x->e_mbd; |
1069 | 11.4M | int64_t best_rd = rd_thresh; |
1070 | 11.4M | struct macroblock_plane *p = &x->plane[0]; |
1071 | 11.4M | struct macroblockd_plane *pd = &xd->plane[0]; |
1072 | 11.4M | const int src_stride = p->src.stride; |
1073 | 11.4M | const int dst_stride = pd->dst.stride; |
1074 | 11.4M | const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4]; |
1075 | 11.4M | uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4]; |
1076 | 11.4M | ENTROPY_CONTEXT ta[2], tempa[2]; |
1077 | 11.4M | ENTROPY_CONTEXT tl[2], templ[2]; |
1078 | 11.4M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
1079 | 11.4M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
1080 | 11.4M | int idx, idy; |
1081 | 11.4M | uint8_t best_dst[8 * 8]; |
1082 | 11.4M | #if CONFIG_VP9_HIGHBITDEPTH |
1083 | 11.4M | uint16_t best_dst16[8 * 8]; |
1084 | 11.4M | #endif |
1085 | 11.4M | memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0])); |
1086 | 11.4M | memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0])); |
1087 | | |
1088 | 11.4M | xd->mi[0]->tx_size = TX_4X4; |
1089 | | |
1090 | 11.4M | assert(!x->skip_block); |
1091 | | |
1092 | 11.4M | #if CONFIG_VP9_HIGHBITDEPTH |
1093 | 11.4M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1094 | 0 | for (mode = DC_PRED; mode <= TM_PRED; ++mode) { |
1095 | 0 | int64_t this_rd; |
1096 | 0 | int ratey = 0; |
1097 | 0 | int64_t distortion = 0; |
1098 | 0 | int rate = bmode_costs[mode]; |
1099 | |
|
1100 | 0 | if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; |
1101 | | |
1102 | | // Only do the oblique modes if the best so far is |
1103 | | // one of the neighboring directional modes |
1104 | 0 | if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { |
1105 | 0 | if (conditional_skipintra(mode, *best_mode)) continue; |
1106 | 0 | } |
1107 | | |
1108 | 0 | memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); |
1109 | 0 | memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); |
1110 | |
|
1111 | 0 | for (idy = 0; idy < num_4x4_blocks_high; ++idy) { |
1112 | 0 | for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { |
1113 | 0 | const int block = (row + idy) * 2 + (col + idx); |
1114 | 0 | const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; |
1115 | 0 | uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; |
1116 | 0 | uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); |
1117 | 0 | int16_t *const src_diff = |
1118 | 0 | vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); |
1119 | 0 | tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
1120 | 0 | tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
1121 | 0 | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
1122 | 0 | uint16_t *const eob = &p->eobs[block]; |
1123 | 0 | xd->mi[0]->bmi[block].as_mode = mode; |
1124 | 0 | vp9_predict_intra_block(xd, 1, TX_4X4, mode, |
1125 | 0 | x->skip_encode ? src : dst, |
1126 | 0 | x->skip_encode ? src_stride : dst_stride, dst, |
1127 | 0 | dst_stride, col + idx, row + idy, 0); |
1128 | 0 | vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, |
1129 | 0 | dst_stride, xd->bd); |
1130 | 0 | if (xd->lossless) { |
1131 | 0 | const ScanOrder *so = &vp9_default_scan_orders[TX_4X4]; |
1132 | 0 | const int coeff_ctx = |
1133 | 0 | combine_entropy_contexts(tempa[idx], templ[idy]); |
1134 | 0 | vp9_highbd_fwht4x4(src_diff, coeff, 8); |
1135 | 0 | vpx_highbd_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, |
1136 | 0 | eob, so); |
1137 | 0 | ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, |
1138 | 0 | so->neighbors, cpi->sf.use_fast_coef_costing); |
1139 | 0 | tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); |
1140 | 0 | if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) |
1141 | 0 | goto next_highbd; |
1142 | 0 | vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16, |
1143 | 0 | dst_stride, p->eobs[block], xd->bd); |
1144 | 0 | } else { |
1145 | 0 | int64_t unused; |
1146 | 0 | const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); |
1147 | 0 | const ScanOrder *so = &vp9_scan_orders[TX_4X4][tx_type]; |
1148 | 0 | const int coeff_ctx = |
1149 | 0 | combine_entropy_contexts(tempa[idx], templ[idy]); |
1150 | 0 | if (tx_type == DCT_DCT) |
1151 | 0 | vpx_highbd_fdct4x4(src_diff, coeff, 8); |
1152 | 0 | else |
1153 | 0 | vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type); |
1154 | 0 | vpx_highbd_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, |
1155 | 0 | eob, so); |
1156 | 0 | ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, |
1157 | 0 | so->neighbors, cpi->sf.use_fast_coef_costing); |
1158 | 0 | distortion += vp9_highbd_block_error_dispatch( |
1159 | 0 | coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, |
1160 | 0 | &unused, xd->bd) >> |
1161 | 0 | 2; |
1162 | 0 | tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); |
1163 | 0 | if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) |
1164 | 0 | goto next_highbd; |
1165 | 0 | vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), |
1166 | 0 | dst16, dst_stride, p->eobs[block], xd->bd); |
1167 | 0 | } |
1168 | 0 | } |
1169 | 0 | } |
1170 | | |
1171 | 0 | rate += ratey; |
1172 | 0 | this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); |
1173 | |
|
1174 | 0 | if (this_rd < best_rd) { |
1175 | 0 | *bestrate = rate; |
1176 | 0 | *bestratey = ratey; |
1177 | 0 | *bestdistortion = distortion; |
1178 | 0 | best_rd = this_rd; |
1179 | 0 | *best_mode = mode; |
1180 | 0 | memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); |
1181 | 0 | memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); |
1182 | 0 | for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { |
1183 | 0 | memcpy(best_dst16 + idy * 8, |
1184 | 0 | CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), |
1185 | 0 | num_4x4_blocks_wide * 4 * sizeof(uint16_t)); |
1186 | 0 | } |
1187 | 0 | } |
1188 | 0 | next_highbd : {} |
1189 | 0 | } |
1190 | 0 | if (best_rd >= rd_thresh || x->skip_encode) return best_rd; |
1191 | | |
1192 | 0 | for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { |
1193 | 0 | memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), |
1194 | 0 | best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t)); |
1195 | 0 | } |
1196 | |
|
1197 | 0 | return best_rd; |
1198 | 0 | } |
1199 | 11.4M | #endif // CONFIG_VP9_HIGHBITDEPTH |
1200 | | |
1201 | 126M | for (mode = DC_PRED; mode <= TM_PRED; ++mode) { |
1202 | 114M | int64_t this_rd; |
1203 | 114M | int ratey = 0; |
1204 | 114M | int64_t distortion = 0; |
1205 | 114M | int rate = bmode_costs[mode]; |
1206 | | |
1207 | 114M | if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; |
1208 | | |
1209 | | // Only do the oblique modes if the best so far is |
1210 | | // one of the neighboring directional modes |
1211 | 114M | if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { |
1212 | 0 | if (conditional_skipintra(mode, *best_mode)) continue; |
1213 | 0 | } |
1214 | | |
1215 | 114M | memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); |
1216 | 114M | memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); |
1217 | | |
1218 | 171M | for (idy = 0; idy < num_4x4_blocks_high; ++idy) { |
1219 | 200M | for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { |
1220 | 144M | const int block = (row + idy) * 2 + (col + idx); |
1221 | 144M | const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; |
1222 | 144M | uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; |
1223 | 144M | int16_t *const src_diff = |
1224 | 144M | vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); |
1225 | 144M | tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
1226 | 144M | tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
1227 | 144M | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
1228 | 144M | uint16_t *const eob = &p->eobs[block]; |
1229 | 144M | xd->mi[0]->bmi[block].as_mode = mode; |
1230 | 144M | vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst, |
1231 | 144M | x->skip_encode ? src_stride : dst_stride, dst, |
1232 | 144M | dst_stride, col + idx, row + idy, 0); |
1233 | 144M | vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); |
1234 | | |
1235 | 144M | if (xd->lossless) { |
1236 | 10.9M | const ScanOrder *so = &vp9_default_scan_orders[TX_4X4]; |
1237 | 10.9M | const int coeff_ctx = |
1238 | 10.9M | combine_entropy_contexts(tempa[idx], templ[idy]); |
1239 | 10.9M | vp9_fwht4x4(src_diff, coeff, 8); |
1240 | 10.9M | vpx_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob, |
1241 | 10.9M | so); |
1242 | 10.9M | ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, |
1243 | 10.9M | so->neighbors, cpi->sf.use_fast_coef_costing); |
1244 | 10.9M | tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0; |
1245 | 10.9M | if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) |
1246 | 5.64M | goto next; |
1247 | 5.35M | vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, |
1248 | 5.35M | p->eobs[block]); |
1249 | 133M | } else { |
1250 | 133M | int64_t unused; |
1251 | 133M | const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); |
1252 | 133M | const ScanOrder *so = &vp9_scan_orders[TX_4X4][tx_type]; |
1253 | 133M | const int coeff_ctx = |
1254 | 133M | combine_entropy_contexts(tempa[idx], templ[idy]); |
1255 | 133M | vp9_fht4x4(src_diff, coeff, 8, tx_type); |
1256 | 133M | vpx_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob, |
1257 | 133M | so); |
1258 | 133M | ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, |
1259 | 133M | so->neighbors, cpi->sf.use_fast_coef_costing); |
1260 | 133M | tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0; |
1261 | 133M | distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), |
1262 | 133M | 16, &unused) >> |
1263 | 133M | 2; |
1264 | 133M | if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) |
1265 | 67.3M | goto next; |
1266 | 65.8M | vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst, |
1267 | 65.8M | dst_stride, p->eobs[block]); |
1268 | 65.8M | } |
1269 | 144M | } |
1270 | 129M | } |
1271 | | |
1272 | 41.6M | rate += ratey; |
1273 | 41.6M | this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); |
1274 | | |
1275 | 41.6M | if (this_rd < best_rd) { |
1276 | 19.7M | *bestrate = rate; |
1277 | 19.7M | *bestratey = ratey; |
1278 | 19.7M | *bestdistortion = distortion; |
1279 | 19.7M | best_rd = this_rd; |
1280 | 19.7M | *best_mode = mode; |
1281 | 19.7M | memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); |
1282 | 19.7M | memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); |
1283 | 109M | for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) |
1284 | 90.2M | memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, |
1285 | 90.2M | num_4x4_blocks_wide * 4); |
1286 | 19.7M | } |
1287 | 114M | next : {} |
1288 | 114M | } |
1289 | | |
1290 | 11.4M | if (best_rd >= rd_thresh || x->skip_encode) return best_rd; |
1291 | | |
1292 | 58.4M | for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) |
1293 | 47.9M | memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, |
1294 | 47.9M | num_4x4_blocks_wide * 4); |
1295 | | |
1296 | 10.5M | return best_rd; |
1297 | 11.4M | } |
1298 | | |
1299 | | static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, |
1300 | | int *rate, int *rate_y, |
1301 | | int64_t *distortion, |
1302 | 3.88M | int64_t best_rd) { |
1303 | 3.88M | int i, j; |
1304 | 3.88M | const MACROBLOCKD *const xd = &mb->e_mbd; |
1305 | 3.88M | MODE_INFO *const mic = xd->mi[0]; |
1306 | 3.88M | const MODE_INFO *above_mi = xd->above_mi; |
1307 | 3.88M | const MODE_INFO *left_mi = xd->left_mi; |
1308 | 3.88M | const BLOCK_SIZE bsize = xd->mi[0]->sb_type; |
1309 | 3.88M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
1310 | 3.88M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
1311 | 3.88M | int idx, idy; |
1312 | 3.88M | int cost = 0; |
1313 | 3.88M | int64_t total_distortion = 0; |
1314 | 3.88M | int tot_rate_y = 0; |
1315 | 3.88M | int64_t total_rd = 0; |
1316 | 3.88M | const int *bmode_costs = cpi->mbmode_cost; |
1317 | | |
1318 | | // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. |
1319 | 9.67M | for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { |
1320 | 17.2M | for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { |
1321 | 11.4M | PREDICTION_MODE best_mode = DC_PRED; |
1322 | 11.4M | int r = INT_MAX, ry = INT_MAX; |
1323 | 11.4M | int64_t d = INT64_MAX, this_rd = INT64_MAX; |
1324 | 11.4M | i = idy * 2 + idx; |
1325 | 11.4M | if (cpi->common.frame_type == KEY_FRAME) { |
1326 | 3.80M | const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); |
1327 | 3.80M | const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); |
1328 | | |
1329 | 3.80M | bmode_costs = cpi->y_mode_costs[A][L]; |
1330 | 3.80M | } |
1331 | | |
1332 | 11.4M | this_rd = rd_pick_intra4x4block( |
1333 | 11.4M | cpi, mb, idy, idx, &best_mode, bmode_costs, |
1334 | 11.4M | xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r, |
1335 | 11.4M | &ry, &d, bsize, best_rd - total_rd); |
1336 | | |
1337 | 11.4M | if (this_rd >= best_rd - total_rd) return INT64_MAX; |
1338 | | |
1339 | 10.5M | total_rd += this_rd; |
1340 | 10.5M | cost += r; |
1341 | 10.5M | total_distortion += d; |
1342 | 10.5M | tot_rate_y += ry; |
1343 | | |
1344 | 10.5M | mic->bmi[i].as_mode = best_mode; |
1345 | 11.9M | for (j = 1; j < num_4x4_blocks_high; ++j) |
1346 | 1.46M | mic->bmi[i + j * 2].as_mode = best_mode; |
1347 | 11.9M | for (j = 1; j < num_4x4_blocks_wide; ++j) |
1348 | 1.47M | mic->bmi[i + j].as_mode = best_mode; |
1349 | | |
1350 | 10.5M | if (total_rd >= best_rd) return INT64_MAX; |
1351 | 10.5M | } |
1352 | 6.73M | } |
1353 | | |
1354 | 2.93M | *rate = cost; |
1355 | 2.93M | *rate_y = tot_rate_y; |
1356 | 2.93M | *distortion = total_distortion; |
1357 | 2.93M | mic->mode = mic->bmi[3].as_mode; |
1358 | | |
1359 | 2.93M | return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); |
1360 | 3.88M | } |
1361 | | |
1362 | | // This function is used only for intra_only frames |
1363 | | static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
1364 | | int *rate_tokenonly, int64_t *distortion, |
1365 | | int *skippable, BLOCK_SIZE bsize, |
1366 | 1.53M | int64_t best_rd) { |
1367 | 1.53M | PREDICTION_MODE mode; |
1368 | 1.53M | PREDICTION_MODE mode_selected = DC_PRED; |
1369 | 1.53M | MACROBLOCKD *const xd = &x->e_mbd; |
1370 | 1.53M | MODE_INFO *const mic = xd->mi[0]; |
1371 | 1.53M | int this_rate, this_rate_tokenonly, s; |
1372 | 1.53M | int64_t this_distortion, this_rd; |
1373 | 1.53M | TX_SIZE best_tx = TX_4X4; |
1374 | 1.53M | int *bmode_costs; |
1375 | 1.53M | const MODE_INFO *above_mi = xd->above_mi; |
1376 | 1.53M | const MODE_INFO *left_mi = xd->left_mi; |
1377 | 1.53M | const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); |
1378 | 1.53M | const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); |
1379 | 1.53M | bmode_costs = cpi->y_mode_costs[A][L]; |
1380 | | |
1381 | 1.53M | memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); |
1382 | | /* Y Search for intra prediction mode */ |
1383 | 16.9M | for (mode = DC_PRED; mode <= TM_PRED; mode++) { |
1384 | 15.3M | if (cpi->sf.use_nonrd_pick_mode) { |
1385 | | // These speed features are turned on in hybrid non-RD and RD mode |
1386 | | // for key frame coding in the context of real-time setting. |
1387 | 0 | if (conditional_skipintra(mode, mode_selected)) continue; |
1388 | 0 | if (*skippable) break; |
1389 | 0 | } |
1390 | | |
1391 | 15.3M | mic->mode = mode; |
1392 | | |
1393 | 15.3M | super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, |
1394 | 15.3M | bsize, best_rd, /*recon=*/NULL); |
1395 | | |
1396 | 15.3M | if (this_rate_tokenonly == INT_MAX) continue; |
1397 | | |
1398 | 3.84M | this_rate = this_rate_tokenonly + bmode_costs[mode]; |
1399 | 3.84M | this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); |
1400 | | |
1401 | 3.84M | if (this_rd < best_rd) { |
1402 | 1.62M | mode_selected = mode; |
1403 | 1.62M | best_rd = this_rd; |
1404 | 1.62M | best_tx = mic->tx_size; |
1405 | 1.62M | *rate = this_rate; |
1406 | 1.62M | *rate_tokenonly = this_rate_tokenonly; |
1407 | 1.62M | *distortion = this_distortion; |
1408 | 1.62M | *skippable = s; |
1409 | 1.62M | } |
1410 | 3.84M | } |
1411 | | |
1412 | 1.53M | mic->mode = mode_selected; |
1413 | 1.53M | mic->tx_size = best_tx; |
1414 | | |
1415 | 1.53M | return best_rd; |
1416 | 1.53M | } |
1417 | | |
1418 | | // Return value 0: early termination triggered, no valid rd cost available; |
1419 | | // 1: rd cost values are valid. |
1420 | | static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
1421 | | int64_t *distortion, int *skippable, int64_t *sse, |
1422 | 77.5M | BLOCK_SIZE bsize, int64_t ref_best_rd) { |
1423 | 77.5M | MACROBLOCKD *const xd = &x->e_mbd; |
1424 | 77.5M | MODE_INFO *const mi = xd->mi[0]; |
1425 | 77.5M | const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); |
1426 | 77.5M | int plane; |
1427 | 77.5M | int pnrate = 0, pnskip = 1; |
1428 | 77.5M | int64_t pndist = 0, pnsse = 0; |
1429 | 77.5M | int is_cost_valid = 1; |
1430 | | |
1431 | 77.5M | if (ref_best_rd < 0) is_cost_valid = 0; |
1432 | | |
1433 | 77.5M | if (is_inter_block(mi) && is_cost_valid) { |
1434 | 37.6M | for (plane = 1; plane < MAX_MB_PLANE; ++plane) |
1435 | 25.1M | vp9_subtract_plane(x, bsize, plane); |
1436 | 12.5M | } |
1437 | | |
1438 | 77.5M | *rate = 0; |
1439 | 77.5M | *distortion = 0; |
1440 | 77.5M | *sse = 0; |
1441 | 77.5M | *skippable = 1; |
1442 | | |
1443 | 211M | for (plane = 1; plane < MAX_MB_PLANE; ++plane) { |
1444 | 146M | txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, |
1445 | 146M | plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing, |
1446 | | /*recon=*/NULL); |
1447 | 146M | if (pnrate == INT_MAX) { |
1448 | 12.0M | is_cost_valid = 0; |
1449 | 12.0M | break; |
1450 | 12.0M | } |
1451 | 134M | *rate += pnrate; |
1452 | 134M | *distortion += pndist; |
1453 | 134M | *sse += pnsse; |
1454 | 134M | *skippable &= pnskip; |
1455 | 134M | } |
1456 | | |
1457 | 77.5M | if (!is_cost_valid) { |
1458 | | // reset cost value |
1459 | 12.0M | *rate = INT_MAX; |
1460 | 12.0M | *distortion = INT64_MAX; |
1461 | 12.0M | *sse = INT64_MAX; |
1462 | 12.0M | *skippable = 0; |
1463 | 12.0M | } |
1464 | | |
1465 | 77.5M | return is_cost_valid; |
1466 | 77.5M | } |
1467 | | |
1468 | | static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, |
1469 | | PICK_MODE_CONTEXT *ctx, int *rate, |
1470 | | int *rate_tokenonly, int64_t *distortion, |
1471 | | int *skippable, BLOCK_SIZE bsize, |
1472 | 6.61M | TX_SIZE max_tx_size) { |
1473 | 6.61M | MACROBLOCKD *xd = &x->e_mbd; |
1474 | 6.61M | PREDICTION_MODE mode; |
1475 | 6.61M | PREDICTION_MODE mode_selected = DC_PRED; |
1476 | 6.61M | int64_t best_rd = INT64_MAX, this_rd; |
1477 | 6.61M | int this_rate_tokenonly, this_rate, s; |
1478 | 6.61M | int64_t this_distortion, this_sse; |
1479 | | |
1480 | 6.61M | memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); |
1481 | 72.7M | for (mode = DC_PRED; mode <= TM_PRED; ++mode) { |
1482 | 66.1M | if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; |
1483 | | #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH |
1484 | | if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && |
1485 | | (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode]) |
1486 | | continue; |
1487 | | #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH |
1488 | | |
1489 | 64.8M | xd->mi[0]->uv_mode = mode; |
1490 | | |
1491 | 64.8M | if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, |
1492 | 64.8M | &this_sse, bsize, best_rd)) |
1493 | 8.43M | continue; |
1494 | 56.4M | this_rate = |
1495 | 56.4M | this_rate_tokenonly + |
1496 | 56.4M | cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode]; |
1497 | 56.4M | this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); |
1498 | | |
1499 | 56.4M | if (this_rd < best_rd) { |
1500 | 11.6M | mode_selected = mode; |
1501 | 11.6M | best_rd = this_rd; |
1502 | 11.6M | *rate = this_rate; |
1503 | 11.6M | *rate_tokenonly = this_rate_tokenonly; |
1504 | 11.6M | *distortion = this_distortion; |
1505 | 11.6M | *skippable = s; |
1506 | 11.6M | if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); |
1507 | 11.6M | } |
1508 | 56.4M | } |
1509 | | |
1510 | 6.61M | xd->mi[0]->uv_mode = mode_selected; |
1511 | 6.61M | return best_rd; |
1512 | 6.61M | } |
1513 | | |
1514 | | #if !CONFIG_REALTIME_ONLY |
1515 | | static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, |
1516 | | int *rate_tokenonly, int64_t *distortion, |
1517 | 0 | int *skippable, BLOCK_SIZE bsize) { |
1518 | 0 | const VP9_COMMON *cm = &cpi->common; |
1519 | 0 | int64_t unused; |
1520 | |
|
1521 | 0 | x->e_mbd.mi[0]->uv_mode = DC_PRED; |
1522 | 0 | memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); |
1523 | 0 | super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, |
1524 | 0 | bsize, INT64_MAX); |
1525 | 0 | *rate = |
1526 | 0 | *rate_tokenonly + |
1527 | 0 | cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED]; |
1528 | 0 | return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); |
1529 | 0 | } |
1530 | | |
1531 | | static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, |
1532 | | PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, |
1533 | | TX_SIZE max_tx_size, int *rate_uv, |
1534 | | int *rate_uv_tokenonly, int64_t *dist_uv, |
1535 | 4.27M | int *skip_uv, PREDICTION_MODE *mode_uv) { |
1536 | | // Use an estimated rd for uv_intra based on DC_PRED if the |
1537 | | // appropriate speed flag is set. |
1538 | 4.27M | if (cpi->sf.use_uv_intra_rd_estimate) { |
1539 | 0 | rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, |
1540 | 0 | bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); |
1541 | | // Else do a proper rd search for each possible transform size that may |
1542 | | // be considered in the main rd loop. |
1543 | 4.27M | } else { |
1544 | 4.27M | rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv, |
1545 | 4.27M | skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, |
1546 | 4.27M | max_tx_size); |
1547 | 4.27M | } |
1548 | 4.27M | *mode_uv = x->e_mbd.mi[0]->uv_mode; |
1549 | 4.27M | } |
1550 | | |
1551 | | static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, |
1552 | 237M | int mode_context) { |
1553 | 237M | assert(is_inter_mode(mode)); |
1554 | 237M | return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; |
1555 | 237M | } |
1556 | | |
1557 | | static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, |
1558 | | int i, PREDICTION_MODE mode, int_mv this_mv[2], |
1559 | | int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], |
1560 | | int_mv seg_mvs[MAX_REF_FRAMES], |
1561 | | int_mv *best_ref_mv[2], const int *mvjcost, |
1562 | 77.2M | int *mvcost[2]) { |
1563 | 77.2M | MODE_INFO *const mi = xd->mi[0]; |
1564 | 77.2M | const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
1565 | 77.2M | int thismvcost = 0; |
1566 | 77.2M | int idx, idy; |
1567 | 77.2M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type]; |
1568 | 77.2M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type]; |
1569 | 77.2M | const int is_compound = has_second_ref(mi); |
1570 | | |
1571 | 77.2M | switch (mode) { |
1572 | 22.4M | case NEWMV: |
1573 | 22.4M | this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int; |
1574 | 22.4M | thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, |
1575 | 22.4M | mvjcost, mvcost, MV_COST_WEIGHT_SUB); |
1576 | 22.4M | if (is_compound) { |
1577 | 0 | this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int; |
1578 | 0 | thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, |
1579 | 0 | mvjcost, mvcost, MV_COST_WEIGHT_SUB); |
1580 | 0 | } |
1581 | 22.4M | break; |
1582 | 14.2M | case NEARMV: |
1583 | 43.1M | case NEARESTMV: |
1584 | 43.1M | this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int; |
1585 | 43.1M | if (is_compound) |
1586 | 0 | this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int; |
1587 | 43.1M | break; |
1588 | 11.6M | default: |
1589 | 11.6M | assert(mode == ZEROMV); |
1590 | 11.6M | this_mv[0].as_int = 0; |
1591 | 11.6M | if (is_compound) this_mv[1].as_int = 0; |
1592 | 11.6M | break; |
1593 | 77.2M | } |
1594 | | |
1595 | 77.2M | mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int; |
1596 | 77.2M | if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int; |
1597 | | |
1598 | 77.2M | mi->bmi[i].as_mode = mode; |
1599 | | |
1600 | 163M | for (idy = 0; idy < num_4x4_blocks_high; ++idy) |
1601 | 181M | for (idx = 0; idx < num_4x4_blocks_wide; ++idx) |
1602 | 94.9M | memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i])); |
1603 | | |
1604 | 77.2M | return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) + |
1605 | 77.2M | thismvcost; |
1606 | 77.2M | } |
1607 | | |
1608 | | static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x, |
1609 | | int64_t best_yrd, int i, int *labelyrate, |
1610 | | int64_t *distortion, int64_t *sse, |
1611 | | ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, |
1612 | 56.4M | int mi_row, int mi_col) { |
1613 | 56.4M | int k; |
1614 | 56.4M | MACROBLOCKD *xd = &x->e_mbd; |
1615 | 56.4M | struct macroblockd_plane *const pd = &xd->plane[0]; |
1616 | 56.4M | struct macroblock_plane *const p = &x->plane[0]; |
1617 | 56.4M | MODE_INFO *const mi = xd->mi[0]; |
1618 | 56.4M | const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd); |
1619 | 56.4M | const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
1620 | 56.4M | const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; |
1621 | 56.4M | int idx, idy; |
1622 | | |
1623 | 56.4M | const uint8_t *const src = |
1624 | 56.4M | &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; |
1625 | 56.4M | uint8_t *const dst = |
1626 | 56.4M | &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; |
1627 | 56.4M | int64_t thisdistortion = 0, thissse = 0; |
1628 | 56.4M | int thisrate = 0, ref; |
1629 | 56.4M | const ScanOrder *so = &vp9_default_scan_orders[TX_4X4]; |
1630 | 56.4M | const int is_compound = has_second_ref(mi); |
1631 | 56.4M | const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; |
1632 | | |
1633 | 56.4M | assert(!x->skip_block); |
1634 | | |
1635 | 112M | for (ref = 0; ref < 1 + is_compound; ++ref) { |
1636 | 56.4M | const int bw = b_width_log2_lookup[BLOCK_8X8]; |
1637 | 56.4M | const int h = 4 * (i >> bw); |
1638 | 56.4M | const int w = 4 * (i & ((1 << bw) - 1)); |
1639 | 56.4M | const struct scale_factors *sf = &xd->block_refs[ref]->sf; |
1640 | 56.4M | int y_stride = pd->pre[ref].stride; |
1641 | 56.4M | uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w); |
1642 | | |
1643 | 56.4M | if (vp9_is_scaled(sf)) { |
1644 | 0 | const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); |
1645 | 0 | const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); |
1646 | |
|
1647 | 0 | y_stride = xd->block_refs[ref]->buf->y_stride; |
1648 | 0 | pre = xd->block_refs[ref]->buf->y_buffer; |
1649 | 0 | pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf); |
1650 | 0 | } |
1651 | 56.4M | #if CONFIG_VP9_HIGHBITDEPTH |
1652 | 56.4M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1653 | 0 | vp9_highbd_build_inter_predictor( |
1654 | 0 | CONVERT_TO_SHORTPTR(pre), y_stride, CONVERT_TO_SHORTPTR(dst), |
1655 | 0 | pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, |
1656 | 0 | &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, |
1657 | 0 | mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2), |
1658 | 0 | xd->bd); |
1659 | 56.4M | } else { |
1660 | 56.4M | vp9_build_inter_predictor( |
1661 | 56.4M | pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, |
1662 | 56.4M | &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, |
1663 | 56.4M | mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); |
1664 | 56.4M | } |
1665 | | #else |
1666 | | vp9_build_inter_predictor( |
1667 | | pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, |
1668 | | &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, |
1669 | | mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); |
1670 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
1671 | 56.4M | } |
1672 | | |
1673 | 56.4M | #if CONFIG_VP9_HIGHBITDEPTH |
1674 | 56.4M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1675 | 0 | vpx_highbd_subtract_block( |
1676 | 0 | height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), |
1677 | 0 | 8, src, p->src.stride, dst, pd->dst.stride, xd->bd); |
1678 | 56.4M | } else { |
1679 | 56.4M | vpx_subtract_block(height, width, |
1680 | 56.4M | vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), |
1681 | 56.4M | 8, src, p->src.stride, dst, pd->dst.stride); |
1682 | 56.4M | } |
1683 | | #else |
1684 | | vpx_subtract_block(height, width, |
1685 | | vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), |
1686 | | 8, src, p->src.stride, dst, pd->dst.stride); |
1687 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
1688 | | |
1689 | 56.4M | k = i; |
1690 | 110M | for (idy = 0; idy < height / 4; ++idy) { |
1691 | 121M | for (idx = 0; idx < width / 4; ++idx) { |
1692 | 68.3M | #if CONFIG_VP9_HIGHBITDEPTH |
1693 | 68.3M | const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; |
1694 | 68.3M | #endif |
1695 | 68.3M | int64_t ssz, rd, rd1, rd2; |
1696 | 68.3M | tran_low_t *coeff, *qcoeff, *dqcoeff; |
1697 | 68.3M | uint16_t *eob; |
1698 | 68.3M | int coeff_ctx; |
1699 | 68.3M | k += (idy * 2 + idx); |
1700 | 68.3M | coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]); |
1701 | 68.3M | coeff = BLOCK_OFFSET(p->coeff, k); |
1702 | 68.3M | qcoeff = BLOCK_OFFSET(p->qcoeff, k); |
1703 | 68.3M | dqcoeff = BLOCK_OFFSET(pd->dqcoeff, k); |
1704 | 68.3M | eob = &p->eobs[k]; |
1705 | | |
1706 | 68.3M | x->fwd_txfm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), |
1707 | 68.3M | coeff, 8); |
1708 | 68.3M | #if CONFIG_VP9_HIGHBITDEPTH |
1709 | 68.3M | vpx_highbd_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob, |
1710 | 68.3M | so); |
1711 | 68.3M | thisdistortion += vp9_highbd_block_error_dispatch( |
1712 | 68.3M | coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd); |
1713 | | #else |
1714 | | vpx_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob, so); |
1715 | | thisdistortion += |
1716 | | vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); |
1717 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
1718 | 68.3M | thissse += ssz; |
1719 | 68.3M | thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan, |
1720 | 68.3M | so->neighbors, cpi->sf.use_fast_coef_costing); |
1721 | 68.3M | ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0; |
1722 | 68.3M | rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); |
1723 | 68.3M | rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); |
1724 | 68.3M | rd = VPXMIN(rd1, rd2); |
1725 | 68.3M | if (rd >= best_yrd) return INT64_MAX; |
1726 | 68.3M | } |
1727 | 62.4M | } |
1728 | | |
1729 | 47.6M | *distortion = thisdistortion >> 2; |
1730 | 47.6M | *labelyrate = thisrate; |
1731 | 47.6M | *sse = thissse >> 2; |
1732 | | |
1733 | 47.6M | return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); |
1734 | 56.4M | } |
1735 | | #endif // !CONFIG_REALTIME_ONLY |
1736 | | |
1737 | | typedef struct { |
1738 | | int eobs; |
1739 | | int brate; |
1740 | | int byrate; |
1741 | | int64_t bdist; |
1742 | | int64_t bsse; |
1743 | | int64_t brdcost; |
1744 | | int_mv mvs[2]; |
1745 | | ENTROPY_CONTEXT ta[2]; |
1746 | | ENTROPY_CONTEXT tl[2]; |
1747 | | } SEG_RDSTAT; |
1748 | | |
1749 | | typedef struct { |
1750 | | int_mv *ref_mv[2]; |
1751 | | int_mv mvp; |
1752 | | |
1753 | | int64_t segment_rd; |
1754 | | int r; |
1755 | | int64_t d; |
1756 | | int64_t sse; |
1757 | | int segment_yrate; |
1758 | | PREDICTION_MODE modes[4]; |
1759 | | SEG_RDSTAT rdstat[4][INTER_MODES]; |
1760 | | int mvthresh; |
1761 | | } BEST_SEG_INFO; |
1762 | | |
1763 | | #if !CONFIG_REALTIME_ONLY |
1764 | 79.2M | static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { |
1765 | 79.2M | return (mv->row >> 3) < mv_limits->row_min || |
1766 | 79.2M | (mv->row >> 3) > mv_limits->row_max || |
1767 | 79.2M | (mv->col >> 3) < mv_limits->col_min || |
1768 | 79.2M | (mv->col >> 3) > mv_limits->col_max; |
1769 | 79.2M | } |
1770 | | |
1771 | 16.0M | static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { |
1772 | 16.0M | MODE_INFO *const mi = x->e_mbd.mi[0]; |
1773 | 16.0M | struct macroblock_plane *const p = &x->plane[0]; |
1774 | 16.0M | struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; |
1775 | | |
1776 | 16.0M | p->src.buf = |
1777 | 16.0M | &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; |
1778 | 16.0M | assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); |
1779 | 16.0M | pd->pre[0].buf = |
1780 | 16.0M | &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)]; |
1781 | 16.0M | if (has_second_ref(mi)) |
1782 | 0 | pd->pre[1].buf = |
1783 | 0 | &pd->pre[1] |
1784 | 0 | .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)]; |
1785 | 16.0M | } |
1786 | | |
1787 | | static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, |
1788 | 16.0M | struct buf_2d orig_pre[2]) { |
1789 | 16.0M | MODE_INFO *mi = x->e_mbd.mi[0]; |
1790 | 16.0M | x->plane[0].src = orig_src; |
1791 | 16.0M | x->e_mbd.plane[0].pre[0] = orig_pre[0]; |
1792 | 16.0M | if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1]; |
1793 | 16.0M | } |
1794 | | |
1795 | 23.4M | static INLINE int mv_has_subpel(const MV *mv) { |
1796 | 23.4M | return (mv->row & 0x0F) || (mv->col & 0x0F); |
1797 | 23.4M | } |
1798 | | |
1799 | | // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. |
1800 | | // TODO(aconverse): Find out if this is still productive then clean up or remove |
1801 | | static int check_best_zero_mv(const VP9_COMP *cpi, |
1802 | | const uint8_t mode_context[MAX_REF_FRAMES], |
1803 | | int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], |
1804 | | int this_mode, |
1805 | 100M | const MV_REFERENCE_FRAME ref_frames[2]) { |
1806 | 100M | if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && |
1807 | 100M | frame_mv[this_mode][ref_frames[0]].as_int == 0 && |
1808 | 100M | (ref_frames[1] == NO_REF_FRAME || |
1809 | 45.7M | frame_mv[this_mode][ref_frames[1]].as_int == 0)) { |
1810 | 45.7M | int rfc = mode_context[ref_frames[0]]; |
1811 | 45.7M | int c1 = cost_mv_ref(cpi, NEARMV, rfc); |
1812 | 45.7M | int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); |
1813 | 45.7M | int c3 = cost_mv_ref(cpi, ZEROMV, rfc); |
1814 | | |
1815 | 45.7M | if (this_mode == NEARMV) { |
1816 | 12.9M | if (c1 > c3) return 0; |
1817 | 32.7M | } else if (this_mode == NEARESTMV) { |
1818 | 8.52M | if (c2 > c3) return 0; |
1819 | 24.2M | } else { |
1820 | 24.2M | assert(this_mode == ZEROMV); |
1821 | 24.2M | if (ref_frames[1] == NO_REF_FRAME) { |
1822 | 24.2M | if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) || |
1823 | 24.2M | (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0)) |
1824 | 11.1M | return 0; |
1825 | 24.2M | } else { |
1826 | 0 | if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 && |
1827 | 0 | frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) || |
1828 | 0 | (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 && |
1829 | 0 | frame_mv[NEARMV][ref_frames[1]].as_int == 0)) |
1830 | 0 | return 0; |
1831 | 0 | } |
1832 | 24.2M | } |
1833 | 45.7M | } |
1834 | 81.0M | return 1; |
1835 | 100M | } |
1836 | | |
1837 | 0 | static INLINE int skip_iters(int_mv iter_mvs[][2], int ite, int id) { |
1838 | 0 | if (ite >= 2 && iter_mvs[ite - 2][!id].as_int == iter_mvs[ite][!id].as_int) { |
1839 | 0 | int_mv cur_fullpel_mv, prev_fullpel_mv; |
1840 | 0 | cur_fullpel_mv.as_mv.row = iter_mvs[ite][id].as_mv.row >> 3; |
1841 | 0 | cur_fullpel_mv.as_mv.col = iter_mvs[ite][id].as_mv.col >> 3; |
1842 | 0 | prev_fullpel_mv.as_mv.row = iter_mvs[ite - 2][id].as_mv.row >> 3; |
1843 | 0 | prev_fullpel_mv.as_mv.col = iter_mvs[ite - 2][id].as_mv.col >> 3; |
1844 | 0 | if (cur_fullpel_mv.as_int == prev_fullpel_mv.as_int) return 1; |
1845 | 0 | } |
1846 | 0 | return 0; |
1847 | 0 | } |
1848 | | |
1849 | | // Compares motion vector and mode rate of current mode and given mode. |
1850 | | static INLINE int compare_mv_mode_rate(MV this_mv, MV mode_mv, |
1851 | | int this_mode_rate, int mode_rate, |
1852 | 4.58M | int mv_thresh) { |
1853 | 4.58M | const int mv_diff = |
1854 | 4.58M | abs(mode_mv.col - this_mv.col) + abs(mode_mv.row - this_mv.row); |
1855 | 4.58M | if (mv_diff <= mv_thresh && mode_rate < this_mode_rate) return 1; |
1856 | 4.36M | return 0; |
1857 | 4.58M | } |
1858 | | |
1859 | | // Skips single reference inter modes NEARMV and ZEROMV based on motion vector |
1860 | | // difference and mode rate. |
1861 | | static INLINE int skip_single_mode_based_on_mode_rate( |
1862 | | int_mv (*mode_mv)[MAX_REF_FRAMES], int *single_mode_rate, int this_mode, |
1863 | 7.21M | int ref0, int this_mode_rate, int best_mode_index) { |
1864 | 7.21M | MV this_mv = mode_mv[this_mode][ref0].as_mv; |
1865 | 7.21M | const int mv_thresh = 3; |
1866 | | |
1867 | | // Pruning is not applicable for NEARESTMV or NEWMV modes. |
1868 | 7.21M | if (this_mode == NEARESTMV || this_mode == NEWMV) return 0; |
1869 | | // Pruning is not done when reference frame of the mode is same as best |
1870 | | // reference so far. |
1871 | 2.48M | if (best_mode_index > 0 && |
1872 | 2.48M | ref0 == vp9_mode_order[best_mode_index].ref_frame[0]) |
1873 | 451k | return 0; |
1874 | | |
1875 | | // Check absolute mv difference and mode rate of current mode w.r.t NEARESTMV |
1876 | 2.03M | if (compare_mv_mode_rate( |
1877 | 2.03M | this_mv, mode_mv[NEARESTMV][ref0].as_mv, this_mode_rate, |
1878 | 2.03M | single_mode_rate[INTER_OFFSET(NEARESTMV)], mv_thresh)) |
1879 | 218k | return 1; |
1880 | | |
1881 | | // Check absolute mv difference and mode rate of current mode w.r.t NEWMV |
1882 | 1.81M | if (compare_mv_mode_rate(this_mv, mode_mv[NEWMV][ref0].as_mv, this_mode_rate, |
1883 | 1.81M | single_mode_rate[INTER_OFFSET(NEWMV)], mv_thresh)) |
1884 | 148 | return 1; |
1885 | | |
1886 | | // Pruning w.r.t NEARMV is applicable only for ZEROMV mode |
1887 | 1.81M | if (this_mode == NEARMV) return 0; |
1888 | | // Check absolute mv difference and mode rate of current mode w.r.t NEARMV |
1889 | 734k | if (compare_mv_mode_rate(this_mv, mode_mv[NEARMV][ref0].as_mv, this_mode_rate, |
1890 | 734k | single_mode_rate[INTER_OFFSET(NEARMV)], mv_thresh)) |
1891 | 209 | return 1; |
1892 | 733k | return 0; |
1893 | 734k | } |
1894 | | |
1895 | 0 | #define MAX_JOINT_MV_SEARCH_ITERS 4 |
1896 | 0 | static INLINE int get_joint_search_iters(int sf_level, BLOCK_SIZE bsize) { |
1897 | 0 | int num_iters = MAX_JOINT_MV_SEARCH_ITERS; // sf_level = 0 |
1898 | 0 | if (sf_level >= 2) |
1899 | 0 | num_iters = 0; |
1900 | 0 | else if (sf_level >= 1) |
1901 | 0 | num_iters = bsize < BLOCK_8X8 |
1902 | 0 | ? 0 |
1903 | 0 | : (bsize <= BLOCK_16X16 ? 2 : MAX_JOINT_MV_SEARCH_ITERS); |
1904 | 0 | return num_iters; |
1905 | 0 | } |
1906 | | |
1907 | | static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, |
1908 | | int_mv *frame_mv, int mi_row, int mi_col, |
1909 | | int_mv single_newmv[MAX_REF_FRAMES], |
1910 | 0 | int *rate_mv, int num_iters) { |
1911 | 0 | const VP9_COMMON *const cm = &cpi->common; |
1912 | 0 | const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; |
1913 | 0 | const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; |
1914 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1915 | 0 | MODE_INFO *mi = xd->mi[0]; |
1916 | 0 | const int refs[2] = { mi->ref_frame[0], |
1917 | 0 | mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] }; |
1918 | 0 | int_mv ref_mv[2]; |
1919 | 0 | int_mv iter_mvs[MAX_JOINT_MV_SEARCH_ITERS][2]; |
1920 | 0 | int ite, ref; |
1921 | 0 | const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; |
1922 | 0 | struct scale_factors sf; |
1923 | | |
1924 | | // Do joint motion search in compound mode to get more accurate mv. |
1925 | 0 | struct buf_2d backup_yv12[2][MAX_MB_PLANE]; |
1926 | 0 | uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX }; |
1927 | 0 | const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { |
1928 | 0 | vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]), |
1929 | 0 | vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1]) |
1930 | 0 | }; |
1931 | | |
1932 | | // Prediction buffer from second frame. |
1933 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1934 | 0 | DECLARE_ALIGNED(32, uint16_t, second_pred_alloc_16[64 * 64]); |
1935 | 0 | uint8_t *second_pred; |
1936 | | #else |
1937 | | DECLARE_ALIGNED(32, uint8_t, second_pred[64 * 64]); |
1938 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
1939 | | |
1940 | | // Check number of iterations do not exceed the max |
1941 | 0 | assert(num_iters <= MAX_JOINT_MV_SEARCH_ITERS); |
1942 | |
|
1943 | 0 | for (ref = 0; ref < 2; ++ref) { |
1944 | 0 | ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0]; |
1945 | |
|
1946 | 0 | if (scaled_ref_frame[ref]) { |
1947 | 0 | int i; |
1948 | | // Swap out the reference frame for a version that's been scaled to |
1949 | | // match the resolution of the current frame, allowing the existing |
1950 | | // motion search code to be used without additional modifications. |
1951 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) |
1952 | 0 | backup_yv12[ref][i] = xd->plane[i].pre[ref]; |
1953 | 0 | vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, |
1954 | 0 | NULL); |
1955 | 0 | } |
1956 | |
|
1957 | 0 | frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; |
1958 | 0 | iter_mvs[0][ref].as_int = single_newmv[refs[ref]].as_int; |
1959 | 0 | } |
1960 | | |
1961 | | // Since we have scaled the reference frames to match the size of the current |
1962 | | // frame we must use a unit scaling factor during mode selection. |
1963 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1964 | 0 | vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, |
1965 | 0 | cm->height, cm->use_highbitdepth); |
1966 | | #else |
1967 | | vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, |
1968 | | cm->height); |
1969 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
1970 | | |
1971 | | // Allow joint search multiple times iteratively for each reference frame |
1972 | | // and break out of the search loop if it couldn't find a better mv. |
1973 | 0 | for (ite = 0; ite < num_iters; ite++) { |
1974 | 0 | struct buf_2d ref_yv12[2]; |
1975 | 0 | uint32_t bestsme = UINT_MAX; |
1976 | 0 | int sadpb = x->sadperbit16; |
1977 | 0 | MV tmp_mv; |
1978 | 0 | int search_range = 3; |
1979 | |
|
1980 | 0 | const MvLimits tmp_mv_limits = x->mv_limits; |
1981 | 0 | int id = ite % 2; // Even iterations search in the first reference frame, |
1982 | | // odd iterations search in the second. The predictor |
1983 | | // found for the 'other' reference frame is factored in. |
1984 | | |
1985 | | // Skip further iterations of search if in the previous iteration, the |
1986 | | // motion vector of the searched ref frame is unchanged, and the other ref |
1987 | | // frame's full-pixel mv is unchanged. |
1988 | 0 | if (skip_iters(iter_mvs, ite, id)) break; |
1989 | | |
1990 | | // Initialized here because of compiler problem in Visual Studio. |
1991 | 0 | ref_yv12[0] = xd->plane[0].pre[0]; |
1992 | 0 | ref_yv12[1] = xd->plane[0].pre[1]; |
1993 | | |
1994 | | // Get the prediction block from the 'other' reference frame. |
1995 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
1996 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
1997 | 0 | second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); |
1998 | 0 | vp9_highbd_build_inter_predictor( |
1999 | 0 | CONVERT_TO_SHORTPTR(ref_yv12[!id].buf), ref_yv12[!id].stride, |
2000 | 0 | second_pred_alloc_16, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, |
2001 | 0 | kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd); |
2002 | 0 | } else { |
2003 | 0 | second_pred = (uint8_t *)second_pred_alloc_16; |
2004 | 0 | vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, |
2005 | 0 | second_pred, pw, &frame_mv[refs[!id]].as_mv, |
2006 | 0 | &sf, pw, ph, 0, kernel, MV_PRECISION_Q3, |
2007 | 0 | mi_col * MI_SIZE, mi_row * MI_SIZE); |
2008 | 0 | } |
2009 | | #else |
2010 | | vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, |
2011 | | second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf, |
2012 | | pw, ph, 0, kernel, MV_PRECISION_Q3, |
2013 | | mi_col * MI_SIZE, mi_row * MI_SIZE); |
2014 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
2015 | | |
2016 | | // Do compound motion search on the current reference frame. |
2017 | 0 | if (id) xd->plane[0].pre[0] = ref_yv12[id]; |
2018 | 0 | vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); |
2019 | | |
2020 | | // Use the mv result from the single mode as mv predictor. |
2021 | 0 | tmp_mv = frame_mv[refs[id]].as_mv; |
2022 | |
|
2023 | 0 | tmp_mv.col >>= 3; |
2024 | 0 | tmp_mv.row >>= 3; |
2025 | | |
2026 | | // Small-range full-pixel motion search. |
2027 | 0 | bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range, |
2028 | 0 | &cpi->fn_ptr[bsize], &ref_mv[id].as_mv, |
2029 | 0 | second_pred); |
2030 | 0 | if (bestsme < UINT_MAX) |
2031 | 0 | bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, |
2032 | 0 | second_pred, &cpi->fn_ptr[bsize], 1); |
2033 | |
|
2034 | 0 | x->mv_limits = tmp_mv_limits; |
2035 | |
|
2036 | 0 | if (bestsme < UINT_MAX) { |
2037 | 0 | uint32_t dis; /* TODO: use dis in distortion calculation later. */ |
2038 | 0 | uint32_t sse; |
2039 | 0 | bestsme = cpi->find_fractional_mv_step( |
2040 | 0 | x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, |
2041 | 0 | x->errorperbit, &cpi->fn_ptr[bsize], 0, |
2042 | 0 | cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost, |
2043 | 0 | &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search); |
2044 | 0 | } |
2045 | | |
2046 | | // Restore the pointer to the first (possibly scaled) prediction buffer. |
2047 | 0 | if (id) xd->plane[0].pre[0] = ref_yv12[0]; |
2048 | |
|
2049 | 0 | if (bestsme < last_besterr[id]) { |
2050 | 0 | frame_mv[refs[id]].as_mv = tmp_mv; |
2051 | 0 | last_besterr[id] = bestsme; |
2052 | 0 | } else { |
2053 | 0 | break; |
2054 | 0 | } |
2055 | 0 | if (ite < num_iters - 1) { |
2056 | 0 | iter_mvs[ite + 1][0].as_int = frame_mv[refs[0]].as_int; |
2057 | 0 | iter_mvs[ite + 1][1].as_int = frame_mv[refs[1]].as_int; |
2058 | 0 | } |
2059 | 0 | } |
2060 | |
|
2061 | 0 | *rate_mv = 0; |
2062 | |
|
2063 | 0 | for (ref = 0; ref < 2; ++ref) { |
2064 | 0 | if (scaled_ref_frame[ref]) { |
2065 | | // Restore the prediction frame pointers to their unscaled versions. |
2066 | 0 | int i; |
2067 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) |
2068 | 0 | xd->plane[i].pre[ref] = backup_yv12[ref][i]; |
2069 | 0 | } |
2070 | |
|
2071 | 0 | *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, |
2072 | 0 | &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, |
2073 | 0 | x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); |
2074 | 0 | } |
2075 | 0 | } |
2076 | | |
2077 | | static int64_t rd_pick_best_sub8x8_mode( |
2078 | | VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, |
2079 | | int_mv *second_best_ref_mv, int64_t best_rd_so_far, int *returntotrate, |
2080 | | int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse, |
2081 | | int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf, |
2082 | 6.16M | int filter_idx, int mi_row, int mi_col) { |
2083 | 6.16M | int i; |
2084 | 6.16M | BEST_SEG_INFO *bsi = bsi_buf + filter_idx; |
2085 | 6.16M | MACROBLOCKD *xd = &x->e_mbd; |
2086 | 6.16M | MODE_INFO *mi = xd->mi[0]; |
2087 | 6.16M | int mode_idx; |
2088 | 6.16M | int k, br = 0, idx, idy; |
2089 | 6.16M | int64_t bd = 0, block_sse = 0; |
2090 | 6.16M | PREDICTION_MODE this_mode; |
2091 | 6.16M | VP9_COMMON *cm = &cpi->common; |
2092 | 6.16M | struct macroblock_plane *const p = &x->plane[0]; |
2093 | 6.16M | struct macroblockd_plane *const pd = &xd->plane[0]; |
2094 | 6.16M | const int label_count = 4; |
2095 | 6.16M | int64_t this_segment_rd = 0; |
2096 | 6.16M | int label_mv_thresh; |
2097 | 6.16M | int segmentyrate = 0; |
2098 | 6.16M | const BLOCK_SIZE bsize = mi->sb_type; |
2099 | 6.16M | const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; |
2100 | 6.16M | const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; |
2101 | 6.16M | const int pw = num_4x4_blocks_wide << 2; |
2102 | 6.16M | const int ph = num_4x4_blocks_high << 2; |
2103 | 6.16M | ENTROPY_CONTEXT t_above[2], t_left[2]; |
2104 | 6.16M | int subpelmv = 1, have_ref = 0; |
2105 | 6.16M | SPEED_FEATURES *const sf = &cpi->sf; |
2106 | 6.16M | const int has_second_rf = has_second_ref(mi); |
2107 | 6.16M | const int inter_mode_mask = sf->inter_mode_mask[bsize]; |
2108 | 6.16M | MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
2109 | | |
2110 | 6.16M | vp9_zero(*bsi); |
2111 | | |
2112 | 6.16M | bsi->segment_rd = best_rd_so_far; |
2113 | 6.16M | bsi->ref_mv[0] = best_ref_mv; |
2114 | 6.16M | bsi->ref_mv[1] = second_best_ref_mv; |
2115 | 6.16M | bsi->mvp.as_int = best_ref_mv->as_int; |
2116 | 6.16M | bsi->mvthresh = mvthresh; |
2117 | | |
2118 | 30.8M | for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV; |
2119 | | |
2120 | 6.16M | memcpy(t_above, pd->above_context, sizeof(t_above)); |
2121 | 6.16M | memcpy(t_left, pd->left_context, sizeof(t_left)); |
2122 | | |
2123 | | // 64 makes this threshold really big effectively |
2124 | | // making it so that we very rarely check mvs on |
2125 | | // segments. setting this to 1 would make mv thresh |
2126 | | // roughly equal to what it is for macroblocks |
2127 | 6.16M | label_mv_thresh = 1 * bsi->mvthresh / label_count; |
2128 | | |
2129 | | // Segmentation method overheads |
2130 | 14.8M | for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { |
2131 | 27.5M | for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { |
2132 | | // TODO(jingning,rbultje): rewrite the rate-distortion optimization |
2133 | | // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop |
2134 | 18.8M | int_mv mode_mv[MB_MODE_COUNT][2]; |
2135 | 18.8M | int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; |
2136 | 18.8M | PREDICTION_MODE mode_selected = ZEROMV; |
2137 | 18.8M | int64_t best_rd = INT64_MAX; |
2138 | 18.8M | const int block = idy * 2 + idx; |
2139 | 18.8M | int ref; |
2140 | | |
2141 | 37.7M | for (ref = 0; ref < 1 + has_second_rf; ++ref) { |
2142 | 18.8M | const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; |
2143 | 18.8M | frame_mv[ZEROMV][frame].as_int = 0; |
2144 | 18.8M | vp9_append_sub8x8_mvs_for_idx( |
2145 | 18.8M | cm, xd, block, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame], |
2146 | 18.8M | &frame_mv[NEARMV][frame], mbmi_ext->mode_context); |
2147 | 18.8M | } |
2148 | | |
2149 | | // search for the best motion vector on this segment |
2150 | 93.2M | for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { |
2151 | 75.4M | const struct buf_2d orig_src = x->plane[0].src; |
2152 | 75.4M | struct buf_2d orig_pre[2]; |
2153 | | |
2154 | 75.4M | mode_idx = INTER_OFFSET(this_mode); |
2155 | 75.4M | bsi->rdstat[block][mode_idx].brdcost = INT64_MAX; |
2156 | 75.4M | if (!(inter_mode_mask & (1 << this_mode))) continue; |
2157 | | |
2158 | 75.4M | if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, |
2159 | 75.4M | this_mode, mi->ref_frame)) |
2160 | 14.4M | continue; |
2161 | | |
2162 | 61.0M | memcpy(orig_pre, pd->pre, sizeof(orig_pre)); |
2163 | 61.0M | memcpy(bsi->rdstat[block][mode_idx].ta, t_above, |
2164 | 61.0M | sizeof(bsi->rdstat[block][mode_idx].ta)); |
2165 | 61.0M | memcpy(bsi->rdstat[block][mode_idx].tl, t_left, |
2166 | 61.0M | sizeof(bsi->rdstat[block][mode_idx].tl)); |
2167 | | |
2168 | | // motion search for newmv (single predictor case only) |
2169 | 61.0M | if (!has_second_rf && this_mode == NEWMV && |
2170 | 61.0M | seg_mvs[block][mi->ref_frame[0]].as_int == INVALID_MV) { |
2171 | 17.1M | MV *const new_mv = &mode_mv[NEWMV][0].as_mv; |
2172 | 17.1M | int step_param = 0; |
2173 | 17.1M | uint32_t bestsme = UINT_MAX; |
2174 | 17.1M | int sadpb = x->sadperbit4; |
2175 | 17.1M | MV mvp_full; |
2176 | 17.1M | int max_mv; |
2177 | 17.1M | int cost_list[5]; |
2178 | 17.1M | const MvLimits tmp_mv_limits = x->mv_limits; |
2179 | | |
2180 | | /* Is the best so far sufficiently good that we can't justify doing |
2181 | | * and new motion search. */ |
2182 | 17.1M | if (best_rd < label_mv_thresh) break; |
2183 | | |
2184 | 16.0M | if (cpi->oxcf.mode != BEST) { |
2185 | | // use previous block's result as next block's MV predictor. |
2186 | 16.0M | if (block > 0) { |
2187 | 10.6M | bsi->mvp.as_int = mi->bmi[block - 1].as_mv[0].as_int; |
2188 | 10.6M | if (block == 2) |
2189 | 3.92M | bsi->mvp.as_int = mi->bmi[block - 2].as_mv[0].as_int; |
2190 | 10.6M | } |
2191 | 16.0M | } |
2192 | 16.0M | if (block == 0) |
2193 | 5.45M | max_mv = x->max_mv_context[mi->ref_frame[0]]; |
2194 | 10.6M | else |
2195 | 10.6M | max_mv = |
2196 | 10.6M | VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; |
2197 | | |
2198 | 16.0M | if (sf->mv.auto_mv_step_size && cm->show_frame) { |
2199 | | // Take wtd average of the step_params based on the last frame's |
2200 | | // max mv magnitude and the best ref mvs of the current block for |
2201 | | // the given reference. |
2202 | 16.0M | step_param = |
2203 | 16.0M | (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2; |
2204 | 16.0M | } else { |
2205 | 0 | step_param = cpi->mv_step_param; |
2206 | 0 | } |
2207 | | |
2208 | 16.0M | mvp_full.row = bsi->mvp.as_mv.row >> 3; |
2209 | 16.0M | mvp_full.col = bsi->mvp.as_mv.col >> 3; |
2210 | | |
2211 | 16.0M | if (sf->adaptive_motion_search) { |
2212 | 16.0M | if (x->pred_mv[mi->ref_frame[0]].row != INT16_MAX && |
2213 | 16.0M | x->pred_mv[mi->ref_frame[0]].col != INT16_MAX) { |
2214 | 15.6M | mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3; |
2215 | 15.6M | mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3; |
2216 | 15.6M | } |
2217 | 16.0M | step_param = VPXMAX(step_param, 8); |
2218 | 16.0M | } |
2219 | | |
2220 | | // adjust src pointer for this block |
2221 | 16.0M | mi_buf_shift(x, block); |
2222 | | |
2223 | 16.0M | vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv); |
2224 | | |
2225 | 16.0M | bestsme = vp9_full_pixel_search( |
2226 | 16.0M | cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, |
2227 | 16.0M | sadpb, |
2228 | 16.0M | sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, |
2229 | 16.0M | &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1); |
2230 | | |
2231 | 16.0M | x->mv_limits = tmp_mv_limits; |
2232 | | |
2233 | 16.0M | if (bestsme < UINT_MAX) { |
2234 | 16.0M | uint32_t distortion; |
2235 | 16.0M | cpi->find_fractional_mv_step( |
2236 | 16.0M | x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, |
2237 | 16.0M | x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop, |
2238 | 16.0M | sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list), |
2239 | 16.0M | x->nmvjointcost, x->mvcost, &distortion, |
2240 | 16.0M | &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph, |
2241 | 16.0M | cpi->sf.use_accurate_subpel_search); |
2242 | | |
2243 | | // save motion search result for use in compound prediction |
2244 | 16.0M | seg_mvs[block][mi->ref_frame[0]].as_mv = *new_mv; |
2245 | 16.0M | } |
2246 | | |
2247 | 16.0M | x->pred_mv[mi->ref_frame[0]] = *new_mv; |
2248 | | |
2249 | | // restore src pointers |
2250 | 16.0M | mi_buf_restore(x, orig_src, orig_pre); |
2251 | 16.0M | } |
2252 | | |
2253 | 59.9M | if (has_second_rf) { |
2254 | 0 | if (seg_mvs[block][mi->ref_frame[1]].as_int == INVALID_MV || |
2255 | 0 | seg_mvs[block][mi->ref_frame[0]].as_int == INVALID_MV) |
2256 | 0 | continue; |
2257 | 0 | } |
2258 | | |
2259 | 59.9M | if (has_second_rf && this_mode == NEWMV && |
2260 | 59.9M | mi->interp_filter == EIGHTTAP) { |
2261 | | // Decide number of joint motion search iterations |
2262 | 0 | const int num_joint_search_iters = get_joint_search_iters( |
2263 | 0 | cpi->sf.comp_inter_joint_search_iter_level, bsize); |
2264 | | // adjust src pointers |
2265 | 0 | mi_buf_shift(x, block); |
2266 | 0 | if (num_joint_search_iters) { |
2267 | 0 | int rate_mv; |
2268 | 0 | joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, |
2269 | 0 | mi_col, seg_mvs[block], &rate_mv, |
2270 | 0 | num_joint_search_iters); |
2271 | 0 | seg_mvs[block][mi->ref_frame[0]].as_int = |
2272 | 0 | frame_mv[this_mode][mi->ref_frame[0]].as_int; |
2273 | 0 | seg_mvs[block][mi->ref_frame[1]].as_int = |
2274 | 0 | frame_mv[this_mode][mi->ref_frame[1]].as_int; |
2275 | 0 | } |
2276 | | // restore src pointers |
2277 | 0 | mi_buf_restore(x, orig_src, orig_pre); |
2278 | 0 | } |
2279 | | |
2280 | 59.9M | bsi->rdstat[block][mode_idx].brate = set_and_cost_bmi_mvs( |
2281 | 59.9M | cpi, x, xd, block, this_mode, mode_mv[this_mode], frame_mv, |
2282 | 59.9M | seg_mvs[block], bsi->ref_mv, x->nmvjointcost, x->mvcost); |
2283 | | |
2284 | 119M | for (ref = 0; ref < 1 + has_second_rf; ++ref) { |
2285 | 59.9M | bsi->rdstat[block][mode_idx].mvs[ref].as_int = |
2286 | 59.9M | mode_mv[this_mode][ref].as_int; |
2287 | 59.9M | if (num_4x4_blocks_wide > 1) |
2288 | 7.04M | bsi->rdstat[block + 1][mode_idx].mvs[ref].as_int = |
2289 | 7.04M | mode_mv[this_mode][ref].as_int; |
2290 | 59.9M | if (num_4x4_blocks_high > 1) |
2291 | 7.09M | bsi->rdstat[block + 2][mode_idx].mvs[ref].as_int = |
2292 | 7.09M | mode_mv[this_mode][ref].as_int; |
2293 | 59.9M | } |
2294 | | |
2295 | | // Trap vectors that reach beyond the UMV borders |
2296 | 59.9M | if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) || |
2297 | 59.9M | (has_second_rf && |
2298 | 58.5M | mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv))) |
2299 | 1.42M | continue; |
2300 | | |
2301 | 58.5M | if (filter_idx > 0) { |
2302 | 5.02M | BEST_SEG_INFO *ref_bsi = bsi_buf; |
2303 | 5.02M | subpelmv = 0; |
2304 | 5.02M | have_ref = 1; |
2305 | | |
2306 | 10.0M | for (ref = 0; ref < 1 + has_second_rf; ++ref) { |
2307 | 5.02M | subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); |
2308 | 5.02M | have_ref &= mode_mv[this_mode][ref].as_int == |
2309 | 5.02M | ref_bsi->rdstat[block][mode_idx].mvs[ref].as_int; |
2310 | 5.02M | } |
2311 | | |
2312 | 5.02M | if (filter_idx > 1 && !subpelmv && !have_ref) { |
2313 | 17.8k | ref_bsi = bsi_buf + 1; |
2314 | 17.8k | have_ref = 1; |
2315 | 35.6k | for (ref = 0; ref < 1 + has_second_rf; ++ref) |
2316 | 17.8k | have_ref &= mode_mv[this_mode][ref].as_int == |
2317 | 17.8k | ref_bsi->rdstat[block][mode_idx].mvs[ref].as_int; |
2318 | 17.8k | } |
2319 | | |
2320 | 5.02M | if (!subpelmv && have_ref && |
2321 | 5.02M | ref_bsi->rdstat[block][mode_idx].brdcost < INT64_MAX) { |
2322 | 2.08M | memcpy(&bsi->rdstat[block][mode_idx], |
2323 | 2.08M | &ref_bsi->rdstat[block][mode_idx], sizeof(SEG_RDSTAT)); |
2324 | 2.08M | if (num_4x4_blocks_wide > 1) |
2325 | 0 | bsi->rdstat[block + 1][mode_idx].eobs = |
2326 | 0 | ref_bsi->rdstat[block + 1][mode_idx].eobs; |
2327 | 2.08M | if (num_4x4_blocks_high > 1) |
2328 | 0 | bsi->rdstat[block + 2][mode_idx].eobs = |
2329 | 0 | ref_bsi->rdstat[block + 2][mode_idx].eobs; |
2330 | | |
2331 | 2.08M | if (bsi->rdstat[block][mode_idx].brdcost < best_rd) { |
2332 | 1.09M | mode_selected = this_mode; |
2333 | 1.09M | best_rd = bsi->rdstat[block][mode_idx].brdcost; |
2334 | 1.09M | } |
2335 | 2.08M | continue; |
2336 | 2.08M | } |
2337 | 5.02M | } |
2338 | | |
2339 | 56.4M | bsi->rdstat[block][mode_idx].brdcost = encode_inter_mb_segment( |
2340 | 56.4M | cpi, x, bsi->segment_rd - this_segment_rd, block, |
2341 | 56.4M | &bsi->rdstat[block][mode_idx].byrate, |
2342 | 56.4M | &bsi->rdstat[block][mode_idx].bdist, |
2343 | 56.4M | &bsi->rdstat[block][mode_idx].bsse, bsi->rdstat[block][mode_idx].ta, |
2344 | 56.4M | bsi->rdstat[block][mode_idx].tl, mi_row, mi_col); |
2345 | 56.4M | if (bsi->rdstat[block][mode_idx].brdcost < INT64_MAX) { |
2346 | 47.6M | bsi->rdstat[block][mode_idx].brdcost += RDCOST( |
2347 | 47.6M | x->rdmult, x->rddiv, bsi->rdstat[block][mode_idx].brate, 0); |
2348 | 47.6M | bsi->rdstat[block][mode_idx].brate += |
2349 | 47.6M | bsi->rdstat[block][mode_idx].byrate; |
2350 | 47.6M | bsi->rdstat[block][mode_idx].eobs = p->eobs[block]; |
2351 | 47.6M | if (num_4x4_blocks_wide > 1) |
2352 | 4.69M | bsi->rdstat[block + 1][mode_idx].eobs = p->eobs[block + 1]; |
2353 | 47.6M | if (num_4x4_blocks_high > 1) |
2354 | 4.64M | bsi->rdstat[block + 2][mode_idx].eobs = p->eobs[block + 2]; |
2355 | 47.6M | } |
2356 | | |
2357 | 56.4M | if (bsi->rdstat[block][mode_idx].brdcost < best_rd) { |
2358 | 23.5M | mode_selected = this_mode; |
2359 | 23.5M | best_rd = bsi->rdstat[block][mode_idx].brdcost; |
2360 | 23.5M | } |
2361 | 56.4M | } /*for each 4x4 mode*/ |
2362 | | |
2363 | 18.8M | if (best_rd == INT64_MAX) { |
2364 | 1.61M | int iy, midx; |
2365 | 3.72M | for (iy = block + 1; iy < 4; ++iy) |
2366 | 10.5M | for (midx = 0; midx < INTER_MODES; ++midx) |
2367 | 8.42M | bsi->rdstat[iy][midx].brdcost = INT64_MAX; |
2368 | 1.61M | bsi->segment_rd = INT64_MAX; |
2369 | 1.61M | return INT64_MAX; |
2370 | 1.61M | } |
2371 | | |
2372 | 17.2M | mode_idx = INTER_OFFSET(mode_selected); |
2373 | 17.2M | memcpy(t_above, bsi->rdstat[block][mode_idx].ta, sizeof(t_above)); |
2374 | 17.2M | memcpy(t_left, bsi->rdstat[block][mode_idx].tl, sizeof(t_left)); |
2375 | | |
2376 | 17.2M | set_and_cost_bmi_mvs(cpi, x, xd, block, mode_selected, |
2377 | 17.2M | mode_mv[mode_selected], frame_mv, seg_mvs[block], |
2378 | 17.2M | bsi->ref_mv, x->nmvjointcost, x->mvcost); |
2379 | | |
2380 | 17.2M | br += bsi->rdstat[block][mode_idx].brate; |
2381 | 17.2M | bd += bsi->rdstat[block][mode_idx].bdist; |
2382 | 17.2M | block_sse += bsi->rdstat[block][mode_idx].bsse; |
2383 | 17.2M | segmentyrate += bsi->rdstat[block][mode_idx].byrate; |
2384 | 17.2M | this_segment_rd += bsi->rdstat[block][mode_idx].brdcost; |
2385 | | |
2386 | 17.2M | if (this_segment_rd > bsi->segment_rd) { |
2387 | 487k | int iy, midx; |
2388 | 1.16M | for (iy = block + 1; iy < 4; ++iy) |
2389 | 3.41M | for (midx = 0; midx < INTER_MODES; ++midx) |
2390 | 2.72M | bsi->rdstat[iy][midx].brdcost = INT64_MAX; |
2391 | 487k | bsi->segment_rd = INT64_MAX; |
2392 | 487k | return INT64_MAX; |
2393 | 487k | } |
2394 | 17.2M | } |
2395 | 10.7M | } /* for each label */ |
2396 | | |
2397 | 4.06M | bsi->r = br; |
2398 | 4.06M | bsi->d = bd; |
2399 | 4.06M | bsi->segment_yrate = segmentyrate; |
2400 | 4.06M | bsi->segment_rd = this_segment_rd; |
2401 | 4.06M | bsi->sse = block_sse; |
2402 | | |
2403 | | // update the coding decisions |
2404 | 20.3M | for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode; |
2405 | | |
2406 | 4.06M | if (bsi->segment_rd > best_rd_so_far) return INT64_MAX; |
2407 | | /* set it to the best */ |
2408 | 20.3M | for (i = 0; i < 4; i++) { |
2409 | 16.2M | mode_idx = INTER_OFFSET(bsi->modes[i]); |
2410 | 16.2M | mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; |
2411 | 16.2M | if (has_second_ref(mi)) |
2412 | 0 | mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; |
2413 | 16.2M | x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; |
2414 | 16.2M | mi->bmi[i].as_mode = bsi->modes[i]; |
2415 | 16.2M | } |
2416 | | |
2417 | | /* |
2418 | | * used to set mbmi->mv.as_int |
2419 | | */ |
2420 | 4.06M | *returntotrate = bsi->r; |
2421 | 4.06M | *returndistortion = bsi->d; |
2422 | 4.06M | *returnyrate = bsi->segment_yrate; |
2423 | 4.06M | *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); |
2424 | 4.06M | *psse = bsi->sse; |
2425 | 4.06M | mi->mode = bsi->modes[3]; |
2426 | | |
2427 | 4.06M | return bsi->segment_rd; |
2428 | 4.06M | } |
2429 | | |
2430 | | static void estimate_ref_frame_costs(const VP9_COMMON *cm, |
2431 | | const MACROBLOCKD *xd, int segment_id, |
2432 | | unsigned int *ref_costs_single, |
2433 | | unsigned int *ref_costs_comp, |
2434 | 5.99M | vpx_prob *comp_mode_p) { |
2435 | 5.99M | int seg_ref_active = |
2436 | 5.99M | segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); |
2437 | 5.99M | if (seg_ref_active) { |
2438 | 0 | memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); |
2439 | 0 | memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); |
2440 | 0 | *comp_mode_p = 128; |
2441 | 5.99M | } else { |
2442 | 5.99M | vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); |
2443 | 5.99M | vpx_prob comp_inter_p = 128; |
2444 | | |
2445 | 5.99M | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
2446 | 0 | comp_inter_p = vp9_get_reference_mode_prob(cm, xd); |
2447 | 0 | *comp_mode_p = comp_inter_p; |
2448 | 5.99M | } else { |
2449 | 5.99M | *comp_mode_p = 128; |
2450 | 5.99M | } |
2451 | | |
2452 | 5.99M | ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); |
2453 | | |
2454 | 5.99M | if (cm->reference_mode != COMPOUND_REFERENCE) { |
2455 | 5.99M | vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); |
2456 | 5.99M | vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); |
2457 | 5.99M | unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); |
2458 | | |
2459 | 5.99M | if (cm->reference_mode == REFERENCE_MODE_SELECT) |
2460 | 0 | base_cost += vp9_cost_bit(comp_inter_p, 0); |
2461 | | |
2462 | 5.99M | ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = |
2463 | 5.99M | ref_costs_single[ALTREF_FRAME] = base_cost; |
2464 | 5.99M | ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); |
2465 | 5.99M | ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); |
2466 | 5.99M | ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); |
2467 | 5.99M | ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); |
2468 | 5.99M | ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); |
2469 | 5.99M | } else { |
2470 | 0 | ref_costs_single[LAST_FRAME] = 512; |
2471 | 0 | ref_costs_single[GOLDEN_FRAME] = 512; |
2472 | 0 | ref_costs_single[ALTREF_FRAME] = 512; |
2473 | 0 | } |
2474 | 5.99M | if (cm->reference_mode != SINGLE_REFERENCE) { |
2475 | 0 | vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); |
2476 | 0 | unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); |
2477 | |
|
2478 | 0 | if (cm->reference_mode == REFERENCE_MODE_SELECT) |
2479 | 0 | base_cost += vp9_cost_bit(comp_inter_p, 1); |
2480 | |
|
2481 | 0 | ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); |
2482 | 0 | ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); |
2483 | 5.99M | } else { |
2484 | 5.99M | ref_costs_comp[LAST_FRAME] = 512; |
2485 | 5.99M | ref_costs_comp[GOLDEN_FRAME] = 512; |
2486 | 5.99M | } |
2487 | 5.99M | } |
2488 | 5.99M | } |
2489 | | |
2490 | | static void store_coding_context( |
2491 | | MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, |
2492 | | int64_t comp_pred_diff[REFERENCE_MODES], |
2493 | 3.75M | int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) { |
2494 | 3.75M | MACROBLOCKD *const xd = &x->e_mbd; |
2495 | | |
2496 | | // Take a snapshot of the coding context so it can be |
2497 | | // restored if we decide to encode this way |
2498 | 3.75M | ctx->skip = x->skip; |
2499 | 3.75M | ctx->skippable = skippable; |
2500 | 3.75M | ctx->best_mode_index = mode_index; |
2501 | 3.75M | ctx->mic = *xd->mi[0]; |
2502 | 3.75M | ctx->mbmi_ext = *x->mbmi_ext; |
2503 | 3.75M | ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; |
2504 | 3.75M | ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; |
2505 | 3.75M | ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; |
2506 | | |
2507 | 3.75M | memcpy(ctx->best_filter_diff, best_filter_diff, |
2508 | 3.75M | sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); |
2509 | 3.75M | } |
2510 | | |
2511 | | static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, |
2512 | | MV_REFERENCE_FRAME ref_frame, |
2513 | | BLOCK_SIZE block_size, int mi_row, int mi_col, |
2514 | | int_mv frame_nearest_mv[MAX_REF_FRAMES], |
2515 | | int_mv frame_near_mv[MAX_REF_FRAMES], |
2516 | 13.3M | struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { |
2517 | 13.3M | const VP9_COMMON *cm = &cpi->common; |
2518 | 13.3M | const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); |
2519 | 13.3M | MACROBLOCKD *const xd = &x->e_mbd; |
2520 | 13.3M | MODE_INFO *const mi = xd->mi[0]; |
2521 | 13.3M | int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; |
2522 | 13.3M | const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; |
2523 | 13.3M | MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
2524 | | |
2525 | 13.3M | assert(yv12 != NULL); |
2526 | | |
2527 | | // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this |
2528 | | // use the UV scaling factors. |
2529 | 13.3M | vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); |
2530 | | |
2531 | | // Gets an initial list of candidate vectors from neighbours and orders them |
2532 | 13.3M | vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col, |
2533 | 13.3M | mbmi_ext->mode_context); |
2534 | | |
2535 | | // Candidate refinement carried out at encoder and decoder |
2536 | 13.3M | vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, |
2537 | 13.3M | &frame_nearest_mv[ref_frame], |
2538 | 13.3M | &frame_near_mv[ref_frame]); |
2539 | | |
2540 | | // Further refinement that is encode side only to test the top few candidates |
2541 | | // in full and choose the best as the centre point for subsequent searches. |
2542 | | // The current implementation doesn't support scaling. |
2543 | 13.3M | if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) |
2544 | 7.08M | vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame, |
2545 | 7.08M | block_size); |
2546 | 13.3M | } |
2547 | | |
2548 | | #if CONFIG_NON_GREEDY_MV |
2549 | | static int ref_frame_to_gf_rf_idx(int ref_frame) { |
2550 | | if (ref_frame == GOLDEN_FRAME) { |
2551 | | return 0; |
2552 | | } |
2553 | | if (ref_frame == LAST_FRAME) { |
2554 | | return 1; |
2555 | | } |
2556 | | if (ref_frame == ALTREF_FRAME) { |
2557 | | return 2; |
2558 | | } |
2559 | | assert(0); |
2560 | | return -1; |
2561 | | } |
2562 | | #endif |
2563 | | |
2564 | | static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, |
2565 | | int mi_row, int mi_col, int_mv *tmp_mv, |
2566 | 6.96M | int *rate_mv) { |
2567 | 6.96M | MACROBLOCKD *xd = &x->e_mbd; |
2568 | 6.96M | const VP9_COMMON *cm = &cpi->common; |
2569 | 6.96M | MODE_INFO *mi = xd->mi[0]; |
2570 | 6.96M | struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; |
2571 | 6.96M | int step_param; |
2572 | 6.96M | MV mvp_full; |
2573 | 6.96M | int ref = mi->ref_frame[0]; |
2574 | 6.96M | MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; |
2575 | 6.96M | const MvLimits tmp_mv_limits = x->mv_limits; |
2576 | 6.96M | int cost_list[5]; |
2577 | 6.96M | const int best_predmv_idx = x->mv_best_ref_index[ref]; |
2578 | 6.96M | const YV12_BUFFER_CONFIG *scaled_ref_frame = |
2579 | 6.96M | vp9_get_scaled_ref_frame(cpi, ref); |
2580 | 6.96M | const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; |
2581 | 6.96M | const int ph = num_4x4_blocks_high_lookup[bsize] << 2; |
2582 | 6.96M | MV pred_mv[3]; |
2583 | | |
2584 | 6.96M | int bestsme = INT_MAX; |
2585 | | #if CONFIG_NON_GREEDY_MV |
2586 | | int gf_group_idx = cpi->twopass.gf_group.index; |
2587 | | int gf_rf_idx = ref_frame_to_gf_rf_idx(ref); |
2588 | | BLOCK_SIZE square_bsize = get_square_block_size(bsize); |
2589 | | int_mv nb_full_mvs[NB_MVS_NUM] = { 0 }; |
2590 | | MotionField *motion_field = vp9_motion_field_info_get_motion_field( |
2591 | | &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize); |
2592 | | const int nb_full_mv_num = |
2593 | | vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); |
2594 | | const int lambda = (pw * ph) / 4; |
2595 | | assert(pw * ph == lambda << 2); |
2596 | | #else // CONFIG_NON_GREEDY_MV |
2597 | 6.96M | int sadpb = x->sadperbit16; |
2598 | 6.96M | #endif // CONFIG_NON_GREEDY_MV |
2599 | | |
2600 | 6.96M | pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; |
2601 | 6.96M | pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; |
2602 | 6.96M | pred_mv[2] = x->pred_mv[ref]; |
2603 | | |
2604 | 6.96M | if (scaled_ref_frame) { |
2605 | 0 | int i; |
2606 | | // Swap out the reference frame for a version that's been scaled to |
2607 | | // match the resolution of the current frame, allowing the existing |
2608 | | // motion search code to be used without additional modifications. |
2609 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; |
2610 | |
|
2611 | 0 | vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); |
2612 | 0 | } |
2613 | | |
2614 | | // Work out the size of the first step in the mv step search. |
2615 | | // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc. |
2616 | 6.96M | if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { |
2617 | | // Take wtd average of the step_params based on the last frame's |
2618 | | // max mv magnitude and that based on the best ref mvs of the current |
2619 | | // block for the given reference. |
2620 | 6.96M | step_param = |
2621 | 6.96M | (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) / |
2622 | 6.96M | 2; |
2623 | 6.96M | } else { |
2624 | 0 | step_param = cpi->mv_step_param; |
2625 | 0 | } |
2626 | | |
2627 | 6.96M | if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { |
2628 | 6.92M | const int boffset = |
2629 | 6.92M | 2 * (b_width_log2_lookup[BLOCK_64X64] - |
2630 | 6.92M | VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); |
2631 | 6.92M | step_param = VPXMAX(step_param, boffset); |
2632 | 6.92M | } |
2633 | | |
2634 | 6.96M | if (cpi->sf.adaptive_motion_search) { |
2635 | 6.96M | int bwl = b_width_log2_lookup[bsize]; |
2636 | 6.96M | int bhl = b_height_log2_lookup[bsize]; |
2637 | 6.96M | int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); |
2638 | | |
2639 | 6.96M | if (tlevel < 5) step_param += 2; |
2640 | | |
2641 | | // prev_mv_sad is not setup for dynamically scaled frames. |
2642 | 6.96M | if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) { |
2643 | 6.96M | int i; |
2644 | 25.8M | for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { |
2645 | 19.6M | if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { |
2646 | 785k | x->pred_mv[ref].row = INT16_MAX; |
2647 | 785k | x->pred_mv[ref].col = INT16_MAX; |
2648 | 785k | tmp_mv->as_int = INVALID_MV; |
2649 | | |
2650 | 785k | if (scaled_ref_frame) { |
2651 | 0 | int j; |
2652 | 0 | for (j = 0; j < MAX_MB_PLANE; ++j) |
2653 | 0 | xd->plane[j].pre[0] = backup_yv12[j]; |
2654 | 0 | } |
2655 | 785k | return; |
2656 | 785k | } |
2657 | 19.6M | } |
2658 | 6.96M | } |
2659 | 6.96M | } |
2660 | | |
2661 | | // Note: MV limits are modified here. Always restore the original values |
2662 | | // after full-pixel motion search. |
2663 | 6.18M | vp9_set_mv_search_range(&x->mv_limits, &ref_mv); |
2664 | | |
2665 | 6.18M | mvp_full = pred_mv[best_predmv_idx]; |
2666 | 6.18M | mvp_full.col >>= 3; |
2667 | 6.18M | mvp_full.row >>= 3; |
2668 | | |
2669 | | #if CONFIG_NON_GREEDY_MV |
2670 | | bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param, |
2671 | | lambda, 1, nb_full_mvs, nb_full_mv_num, |
2672 | | &tmp_mv->as_mv); |
2673 | | #else // CONFIG_NON_GREEDY_MV |
2674 | 6.18M | bestsme = vp9_full_pixel_search( |
2675 | 6.18M | cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, |
2676 | 6.18M | cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); |
2677 | 6.18M | #endif // CONFIG_NON_GREEDY_MV |
2678 | | |
2679 | 6.18M | if (cpi->sf.enhanced_full_pixel_motion_search) { |
2680 | 2.46M | int i; |
2681 | 9.85M | for (i = 0; i < 3; ++i) { |
2682 | 7.39M | int this_me; |
2683 | 7.39M | MV this_mv; |
2684 | 7.39M | int diff_row; |
2685 | 7.39M | int diff_col; |
2686 | 7.39M | int step; |
2687 | | |
2688 | 7.39M | if (pred_mv[i].row == INT16_MAX || pred_mv[i].col == INT16_MAX) continue; |
2689 | 7.28M | if (i == best_predmv_idx) continue; |
2690 | | |
2691 | 4.82M | diff_row = ((int)pred_mv[i].row - |
2692 | 4.82M | pred_mv[i > 0 ? (i - 1) : best_predmv_idx].row) >> |
2693 | 4.82M | 3; |
2694 | 4.82M | diff_col = ((int)pred_mv[i].col - |
2695 | 4.82M | pred_mv[i > 0 ? (i - 1) : best_predmv_idx].col) >> |
2696 | 4.82M | 3; |
2697 | 4.82M | if (diff_row == 0 && diff_col == 0) continue; |
2698 | 3.89M | if (diff_row < 0) diff_row = -diff_row; |
2699 | 3.89M | if (diff_col < 0) diff_col = -diff_col; |
2700 | 3.89M | step = get_msb((diff_row + diff_col + 1) >> 1); |
2701 | 3.89M | if (step <= 0) continue; |
2702 | | |
2703 | 3.71M | mvp_full = pred_mv[i]; |
2704 | 3.71M | mvp_full.col >>= 3; |
2705 | 3.71M | mvp_full.row >>= 3; |
2706 | | #if CONFIG_NON_GREEDY_MV |
2707 | | this_me = vp9_full_pixel_diamond_new( |
2708 | | cpi, x, bsize, &mvp_full, |
2709 | | VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs, |
2710 | | nb_full_mv_num, &this_mv); |
2711 | | #else // CONFIG_NON_GREEDY_MV |
2712 | 3.71M | this_me = vp9_full_pixel_search( |
2713 | 3.71M | cpi, x, bsize, &mvp_full, |
2714 | 3.71M | VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), |
2715 | 3.71M | cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), |
2716 | 3.71M | &ref_mv, &this_mv, INT_MAX, 1); |
2717 | 3.71M | #endif // CONFIG_NON_GREEDY_MV |
2718 | 3.71M | if (this_me < bestsme) { |
2719 | 576k | tmp_mv->as_mv = this_mv; |
2720 | 576k | bestsme = this_me; |
2721 | 576k | } |
2722 | 3.71M | } |
2723 | 2.46M | } |
2724 | | |
2725 | 6.18M | x->mv_limits = tmp_mv_limits; |
2726 | | |
2727 | 6.18M | if (bestsme < INT_MAX) { |
2728 | 6.18M | uint32_t dis; /* TODO: use dis in distortion calculation later. */ |
2729 | 6.18M | cpi->find_fractional_mv_step( |
2730 | 6.18M | x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, |
2731 | 6.18M | &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, |
2732 | 6.18M | cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), |
2733 | 6.18M | x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, |
2734 | 6.18M | cpi->sf.use_accurate_subpel_search); |
2735 | 6.18M | } |
2736 | 6.18M | *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, |
2737 | 6.18M | x->mvcost, MV_COST_WEIGHT); |
2738 | | |
2739 | 6.18M | x->pred_mv[ref] = tmp_mv->as_mv; |
2740 | | |
2741 | 6.18M | if (scaled_ref_frame) { |
2742 | 0 | int i; |
2743 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; |
2744 | 0 | } |
2745 | 6.18M | } |
2746 | | |
2747 | | static INLINE void restore_dst_buf(MACROBLOCKD *xd, |
2748 | | uint8_t *orig_dst[MAX_MB_PLANE], |
2749 | 46.6M | int orig_dst_stride[MAX_MB_PLANE]) { |
2750 | 46.6M | int i; |
2751 | 186M | for (i = 0; i < MAX_MB_PLANE; i++) { |
2752 | 139M | xd->plane[i].dst.buf = orig_dst[i]; |
2753 | 139M | xd->plane[i].dst.stride = orig_dst_stride[i]; |
2754 | 139M | } |
2755 | 46.6M | } |
2756 | | |
2757 | | // In some situations we want to discount tha pparent cost of a new motion |
2758 | | // vector. Where there is a subtle motion field and especially where there is |
2759 | | // low spatial complexity then it can be hard to cover the cost of a new motion |
2760 | | // vector in a single block, even if that motion vector reduces distortion. |
2761 | | // However, once established that vector may be usable through the nearest and |
2762 | | // near mv modes to reduce distortion in subsequent blocks and also improve |
2763 | | // visual quality. |
2764 | | static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv, |
2765 | | int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame, |
2766 | 24.9M | int mi_row, int mi_col, BLOCK_SIZE bsize) { |
2767 | | #if CONFIG_NON_GREEDY_MV |
2768 | | (void)mode_mv; |
2769 | | (void)this_mv; |
2770 | | if (this_mode == NEWMV && bsize >= BLOCK_8X8 && cpi->tpl_ready) { |
2771 | | const int gf_group_idx = cpi->twopass.gf_group.index; |
2772 | | const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame); |
2773 | | const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx]; |
2774 | | const MotionField *motion_field = vp9_motion_field_info_get_motion_field( |
2775 | | &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize); |
2776 | | const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize]; |
2777 | | const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize]; |
2778 | | const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h); |
2779 | | const int tpl_mi_col = mi_col - (mi_col % tpl_block_mi_w); |
2780 | | const int mv_mode = |
2781 | | tpl_frame |
2782 | | .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col]; |
2783 | | if (mv_mode == NEW_MV_MODE) { |
2784 | | int_mv tpl_new_mv = |
2785 | | vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col); |
2786 | | int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row); |
2787 | | int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col); |
2788 | | if (VPXMAX(row_diff, col_diff) <= 8) { |
2789 | | return 1; |
2790 | | } else { |
2791 | | return 0; |
2792 | | } |
2793 | | } else { |
2794 | | return 0; |
2795 | | } |
2796 | | } else { |
2797 | | return 0; |
2798 | | } |
2799 | | #else |
2800 | 24.9M | (void)mi_row; |
2801 | 24.9M | (void)mi_col; |
2802 | 24.9M | (void)bsize; |
2803 | 24.9M | return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) && |
2804 | 24.9M | (this_mv.as_int != 0) && |
2805 | 24.9M | ((mode_mv[NEARESTMV][ref_frame].as_int == 0) || |
2806 | 11.5M | (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) && |
2807 | 24.9M | ((mode_mv[NEARMV][ref_frame].as_int == 0) || |
2808 | 4.15M | (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV))); |
2809 | 24.9M | #endif |
2810 | 24.9M | } |
2811 | | |
2812 | | static int64_t handle_inter_mode( |
2813 | | VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2, |
2814 | | int64_t *distortion, int *skippable, int *rate_y, int *rate_uv, |
2815 | | struct buf_2d *recon, int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], |
2816 | | int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], |
2817 | | INTERP_FILTER (*single_filter)[MAX_REF_FRAMES], |
2818 | | int (*single_skippable)[MAX_REF_FRAMES], int *single_mode_rate, |
2819 | | int64_t *psse, const int64_t ref_best_rd, int64_t *mask_filter, |
2820 | 20.0M | int64_t filter_cache[], int best_mode_index) { |
2821 | 20.0M | VP9_COMMON *cm = &cpi->common; |
2822 | 20.0M | MACROBLOCKD *xd = &x->e_mbd; |
2823 | 20.0M | MODE_INFO *mi = xd->mi[0]; |
2824 | 20.0M | MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
2825 | 20.0M | const int is_comp_pred = has_second_ref(mi); |
2826 | 20.0M | const int this_mode = mi->mode; |
2827 | 20.0M | int_mv *frame_mv = mode_mv[this_mode]; |
2828 | 20.0M | int i; |
2829 | 20.0M | int refs[2] = { mi->ref_frame[0], |
2830 | 20.0M | (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) }; |
2831 | 20.0M | int_mv cur_mv[2]; |
2832 | 20.0M | #if CONFIG_VP9_HIGHBITDEPTH |
2833 | 20.0M | DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); |
2834 | 20.0M | uint8_t *tmp_buf; |
2835 | | #else |
2836 | | DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); |
2837 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
2838 | 20.0M | int intpel_mv; |
2839 | 20.0M | int64_t rd, tmp_rd = INT64_MAX, best_rd = INT64_MAX; |
2840 | 20.0M | int best_needs_copy = 0; |
2841 | 20.0M | uint8_t *orig_dst[MAX_MB_PLANE]; |
2842 | 20.0M | int orig_dst_stride[MAX_MB_PLANE]; |
2843 | 20.0M | int rs = 0; |
2844 | 20.0M | INTERP_FILTER best_filter = SWITCHABLE; |
2845 | 20.0M | uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 }; |
2846 | 20.0M | int64_t bsse[MAX_MB_PLANE << 2] = { 0 }; |
2847 | | |
2848 | 20.0M | const int bsl = mi_width_log2_lookup[bsize]; |
2849 | 20.0M | const int blk_parity = (((mi_row + mi_col) >> bsl) + |
2850 | 20.0M | get_chessboard_index(cm->current_video_frame)) & |
2851 | 20.0M | 0x1; |
2852 | 20.0M | const int pred_filter_search = |
2853 | 20.0M | (cpi->sf.cb_pred_filter_search >= 2) && blk_parity; |
2854 | | |
2855 | 20.0M | int skip_txfm_sb = 0; |
2856 | 20.0M | int64_t skip_sse_sb = INT64_MAX; |
2857 | 20.0M | int64_t distortion_y = 0, distortion_uv = 0; |
2858 | | |
2859 | 20.0M | #if CONFIG_VP9_HIGHBITDEPTH |
2860 | 20.0M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
2861 | 0 | tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); |
2862 | 20.0M | } else { |
2863 | 20.0M | tmp_buf = (uint8_t *)tmp_buf16; |
2864 | 20.0M | } |
2865 | 20.0M | #endif // CONFIG_VP9_HIGHBITDEPTH |
2866 | | |
2867 | 20.0M | if (pred_filter_search) { |
2868 | 0 | INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; |
2869 | 0 | if (xd->above_mi && is_inter_block(xd->above_mi)) |
2870 | 0 | af = xd->above_mi->interp_filter; |
2871 | 0 | if (xd->left_mi && is_inter_block(xd->left_mi)) |
2872 | 0 | lf = xd->left_mi->interp_filter; |
2873 | |
|
2874 | 0 | if ((this_mode != NEWMV) || (af == lf)) best_filter = af; |
2875 | 0 | } |
2876 | | |
2877 | 20.0M | if (is_comp_pred) { |
2878 | 0 | if (frame_mv[refs[0]].as_int == INVALID_MV || |
2879 | 0 | frame_mv[refs[1]].as_int == INVALID_MV) |
2880 | 0 | return INT64_MAX; |
2881 | | |
2882 | 0 | if (cpi->sf.adaptive_mode_search) { |
2883 | 0 | if (single_filter[this_mode][refs[0]] == |
2884 | 0 | single_filter[this_mode][refs[1]]) |
2885 | 0 | best_filter = single_filter[this_mode][refs[0]]; |
2886 | 0 | } |
2887 | 0 | } |
2888 | | |
2889 | 20.0M | if (this_mode == NEWMV) { |
2890 | 6.96M | int rate_mv; |
2891 | 6.96M | if (is_comp_pred) { |
2892 | | // Decide number of joint motion search iterations |
2893 | 0 | const int num_joint_search_iters = get_joint_search_iters( |
2894 | 0 | cpi->sf.comp_inter_joint_search_iter_level, bsize); |
2895 | | |
2896 | | // Initialize mv using single prediction mode result. |
2897 | 0 | frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; |
2898 | 0 | frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; |
2899 | |
|
2900 | 0 | if (num_joint_search_iters) { |
2901 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2902 | | start_timing(cpi, joint_motion_search_time); |
2903 | | #endif |
2904 | 0 | joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, |
2905 | 0 | single_newmv, &rate_mv, num_joint_search_iters); |
2906 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2907 | | end_timing(cpi, joint_motion_search_time); |
2908 | | #endif |
2909 | 0 | } else { |
2910 | 0 | rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, |
2911 | 0 | &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, |
2912 | 0 | x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); |
2913 | 0 | rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, |
2914 | 0 | &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv, |
2915 | 0 | x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); |
2916 | 0 | } |
2917 | 0 | *rate2 += rate_mv; |
2918 | 6.96M | } else { |
2919 | 6.96M | int_mv tmp_mv; |
2920 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2921 | | start_timing(cpi, single_motion_search_time); |
2922 | | #endif |
2923 | 6.96M | single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); |
2924 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2925 | | end_timing(cpi, single_motion_search_time); |
2926 | | #endif |
2927 | 6.96M | if (tmp_mv.as_int == INVALID_MV) return INT64_MAX; |
2928 | | |
2929 | 6.18M | frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int = |
2930 | 6.18M | tmp_mv.as_int; |
2931 | 6.18M | single_newmv[refs[0]].as_int = tmp_mv.as_int; |
2932 | | |
2933 | | // Estimate the rate implications of a new mv but discount this |
2934 | | // under certain circumstances where we want to help initiate a weak |
2935 | | // motion field, where the distortion gain for a single block may not |
2936 | | // be enough to overcome the cost of a new mv. |
2937 | 6.18M | if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0], mi_row, |
2938 | 6.18M | mi_col, bsize)) { |
2939 | 1.94M | *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); |
2940 | 4.24M | } else { |
2941 | 4.24M | *rate2 += rate_mv; |
2942 | 4.24M | } |
2943 | 6.18M | } |
2944 | 6.96M | } |
2945 | | |
2946 | 38.0M | for (i = 0; i < is_comp_pred + 1; ++i) { |
2947 | 19.2M | cur_mv[i] = frame_mv[refs[i]]; |
2948 | | // Clip "next_nearest" so that it does not extend to far out of image |
2949 | 19.2M | if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd); |
2950 | | |
2951 | 19.2M | if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX; |
2952 | 18.8M | mi->mv[i].as_int = cur_mv[i].as_int; |
2953 | 18.8M | } |
2954 | | |
2955 | | // do first prediction into the destination buffer. Do the next |
2956 | | // prediction into a temporary buffer. Then keep track of which one |
2957 | | // of these currently holds the best predictor, and use the other |
2958 | | // one for future predictions. In the end, copy from tmp_buf to |
2959 | | // dst if necessary. |
2960 | 75.2M | for (i = 0; i < MAX_MB_PLANE; i++) { |
2961 | 56.4M | orig_dst[i] = xd->plane[i].dst.buf; |
2962 | 56.4M | orig_dst_stride[i] = xd->plane[i].dst.stride; |
2963 | 56.4M | } |
2964 | | |
2965 | | // We don't include the cost of the second reference here, because there |
2966 | | // are only two options: Last/ARF or Golden/ARF; The second one is always |
2967 | | // known, which is ARF. |
2968 | | // |
2969 | | // Under some circumstances we discount the cost of new mv mode to encourage |
2970 | | // initiation of a motion field. |
2971 | 18.8M | if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0], |
2972 | 18.8M | mi_row, mi_col, bsize)) { |
2973 | 1.94M | *rate2 += |
2974 | 1.94M | VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]), |
2975 | 1.94M | cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]])); |
2976 | 16.8M | } else { |
2977 | 16.8M | *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]); |
2978 | 16.8M | } |
2979 | | |
2980 | 18.8M | if (!is_comp_pred && cpi->sf.prune_single_mode_based_on_mv_diff_mode_rate) { |
2981 | 7.21M | single_mode_rate[INTER_OFFSET(this_mode)] = *rate2; |
2982 | | // Prune NEARMV and ZEROMV modes based on motion vector difference and mode |
2983 | | // rate. |
2984 | 7.21M | if (skip_single_mode_based_on_mode_rate(mode_mv, single_mode_rate, |
2985 | 7.21M | this_mode, refs[0], *rate2, |
2986 | 7.21M | best_mode_index)) { |
2987 | | // Check when the single inter mode is pruned, NEARESTMV or NEWMV modes |
2988 | | // are not early terminated. This ensures all single modes are not getting |
2989 | | // skipped when the speed feature is enabled. |
2990 | 218k | assert(single_mode_rate[INTER_OFFSET(NEARESTMV)] != INT_MAX || |
2991 | 218k | single_mode_rate[INTER_OFFSET(NEWMV)] != INT_MAX); |
2992 | 218k | return INT64_MAX; |
2993 | 218k | } |
2994 | 7.21M | } |
2995 | 18.5M | if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && |
2996 | 18.5M | mi->mode != NEARESTMV) |
2997 | 176k | return INT64_MAX; |
2998 | | |
2999 | | // Are all MVs integer pel for Y and UV |
3000 | 18.4M | intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv); |
3001 | 18.4M | if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv); |
3002 | | |
3003 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3004 | | start_timing(cpi, interp_filter_time); |
3005 | | #endif |
3006 | | // Search for best switchable filter by checking the variance of |
3007 | | // pred error irrespective of whether the filter will be used |
3008 | 92.0M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; |
3009 | | |
3010 | 18.4M | if (cm->interp_filter != BILINEAR) { |
3011 | | // Use cb pattern for filter eval when filter is not switchable |
3012 | 18.4M | const int enable_interp_search = |
3013 | 18.4M | (cpi->sf.cb_pred_filter_search && cm->interp_filter != SWITCHABLE) |
3014 | 18.4M | ? blk_parity |
3015 | 18.4M | : 1; |
3016 | 18.4M | if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { |
3017 | 0 | best_filter = EIGHTTAP; |
3018 | 18.4M | } else if (best_filter == SWITCHABLE && enable_interp_search) { |
3019 | 17.2M | int newbest; |
3020 | 17.2M | int tmp_rate_sum = 0; |
3021 | 17.2M | int64_t tmp_dist_sum = 0; |
3022 | | |
3023 | 47.9M | for (i = 0; i < SWITCHABLE_FILTERS; ++i) { |
3024 | 37.7M | int j; |
3025 | 37.7M | int64_t rs_rd; |
3026 | 37.7M | int tmp_skip_sb = 0; |
3027 | 37.7M | int64_t tmp_skip_sse = INT64_MAX; |
3028 | 37.7M | const int enable_earlyterm = |
3029 | 37.7M | cpi->sf.early_term_interp_search_plane_rd && cm->interp_filter != i; |
3030 | 37.7M | int64_t filt_best_rd; |
3031 | | |
3032 | 37.7M | mi->interp_filter = i; |
3033 | 37.7M | rs = vp9_get_switchable_rate(cpi, xd); |
3034 | 37.7M | rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); |
3035 | | |
3036 | 37.7M | if (i > 0 && intpel_mv) { |
3037 | 10.1M | rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); |
3038 | 10.1M | filter_cache[i] = rd; |
3039 | 10.1M | filter_cache[SWITCHABLE_FILTERS] = |
3040 | 10.1M | VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); |
3041 | 10.1M | if (cm->interp_filter == SWITCHABLE) rd += rs_rd; |
3042 | 10.1M | *mask_filter = VPXMAX(*mask_filter, rd); |
3043 | 27.5M | } else { |
3044 | 27.5M | int rate_sum = 0; |
3045 | 27.5M | int64_t dist_sum = 0; |
3046 | 27.5M | if (i > 0 && cpi->sf.adaptive_interp_filter_search && |
3047 | 27.5M | (cpi->sf.interp_filter_search_mask & (1 << i))) { |
3048 | 0 | rate_sum = INT_MAX; |
3049 | 0 | dist_sum = INT64_MAX; |
3050 | 0 | continue; |
3051 | 0 | } |
3052 | | |
3053 | 27.5M | if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || |
3054 | 27.5M | (cm->interp_filter != SWITCHABLE && |
3055 | 17.4M | (cm->interp_filter == mi->interp_filter || |
3056 | 18.0M | (i == 0 && intpel_mv)))) { |
3057 | 18.0M | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3058 | 18.0M | } else { |
3059 | 38.1M | for (j = 0; j < MAX_MB_PLANE; j++) { |
3060 | 28.5M | xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; |
3061 | 28.5M | xd->plane[j].dst.stride = 64; |
3062 | 28.5M | } |
3063 | 9.53M | } |
3064 | | |
3065 | 27.5M | filt_best_rd = |
3066 | 27.5M | cm->interp_filter == SWITCHABLE ? (best_rd - rs_rd) : best_rd; |
3067 | 27.5M | if (build_inter_pred_model_rd_earlyterm( |
3068 | 27.5M | cpi, mi_row, mi_col, bsize, x, xd, &rate_sum, &dist_sum, |
3069 | 27.5M | &tmp_skip_sb, &tmp_skip_sse, enable_earlyterm, |
3070 | 27.5M | filt_best_rd)) { |
3071 | 2.33M | filter_cache[i] = INT64_MAX; |
3072 | 2.33M | continue; |
3073 | 2.33M | } |
3074 | | |
3075 | 25.2M | rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); |
3076 | 25.2M | filter_cache[i] = rd; |
3077 | 25.2M | filter_cache[SWITCHABLE_FILTERS] = |
3078 | 25.2M | VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); |
3079 | 25.2M | if (cm->interp_filter == SWITCHABLE) rd += rs_rd; |
3080 | 25.2M | *mask_filter = VPXMAX(*mask_filter, rd); |
3081 | | |
3082 | 25.2M | if (i == 0 && intpel_mv) { |
3083 | 9.31M | tmp_rate_sum = rate_sum; |
3084 | 9.31M | tmp_dist_sum = dist_sum; |
3085 | 9.31M | } |
3086 | 25.2M | } |
3087 | | |
3088 | 35.3M | if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { |
3089 | 17.0M | if (rd / 2 > ref_best_rd) { |
3090 | 7.07M | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3091 | 7.07M | return INT64_MAX; |
3092 | 7.07M | } |
3093 | 17.0M | } |
3094 | 28.3M | newbest = i == 0 || rd < best_rd; |
3095 | | |
3096 | 28.3M | if (newbest) { |
3097 | 14.2M | best_rd = rd; |
3098 | 14.2M | best_filter = mi->interp_filter; |
3099 | 14.2M | if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) |
3100 | 1.65M | best_needs_copy = !best_needs_copy; |
3101 | 14.2M | } |
3102 | | |
3103 | 28.3M | if ((cm->interp_filter == SWITCHABLE && newbest) || |
3104 | 28.3M | (cm->interp_filter != SWITCHABLE && |
3105 | 21.5M | cm->interp_filter == mi->interp_filter)) { |
3106 | 12.1M | tmp_rd = best_rd; |
3107 | | |
3108 | 12.1M | skip_txfm_sb = tmp_skip_sb; |
3109 | 12.1M | skip_sse_sb = tmp_skip_sse; |
3110 | 12.1M | memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); |
3111 | 12.1M | memcpy(bsse, x->bsse, sizeof(bsse)); |
3112 | 12.1M | } |
3113 | 28.3M | } |
3114 | 10.2M | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3115 | 10.2M | } |
3116 | 18.4M | } |
3117 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3118 | | end_timing(cpi, interp_filter_time); |
3119 | | #endif |
3120 | | // Set the appropriate filter |
3121 | 11.3M | mi->interp_filter = |
3122 | 11.3M | cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; |
3123 | 11.3M | rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0; |
3124 | | |
3125 | 11.3M | if (tmp_rd != INT64_MAX) { |
3126 | 10.2M | if (best_needs_copy) { |
3127 | | // again temporarily set the buffers to local memory to prevent a memcpy |
3128 | 6.48M | for (i = 0; i < MAX_MB_PLANE; i++) { |
3129 | 4.86M | xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; |
3130 | 4.86M | xd->plane[i].dst.stride = 64; |
3131 | 4.86M | } |
3132 | 1.62M | } |
3133 | 10.2M | rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); |
3134 | 10.2M | } else { |
3135 | 1.11M | int tmp_rate; |
3136 | 1.11M | int64_t tmp_dist; |
3137 | | // Handles the special case when a filter that is not in the |
3138 | | // switchable list (ex. bilinear) is indicated at the frame level, or |
3139 | | // skip condition holds. |
3140 | 1.11M | build_inter_pred_model_rd_earlyterm( |
3141 | 1.11M | cpi, mi_row, mi_col, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb, |
3142 | 1.11M | &skip_sse_sb, 0 /*do_earlyterm*/, INT64_MAX); |
3143 | 1.11M | rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); |
3144 | 1.11M | memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); |
3145 | 1.11M | memcpy(bsse, x->bsse, sizeof(bsse)); |
3146 | 1.11M | } |
3147 | | |
3148 | 11.3M | if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter; |
3149 | | |
3150 | 11.3M | if (cpi->sf.adaptive_mode_search) |
3151 | 0 | if (is_comp_pred) |
3152 | 0 | if (single_skippable[this_mode][refs[0]] && |
3153 | 0 | single_skippable[this_mode][refs[1]]) |
3154 | 0 | memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm)); |
3155 | | |
3156 | 11.3M | if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { |
3157 | | // if current pred_error modeled rd is substantially more than the best |
3158 | | // so far, do not bother doing full rd |
3159 | 11.0M | if (rd / 2 > ref_best_rd) { |
3160 | 552k | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3161 | 552k | return INT64_MAX; |
3162 | 552k | } |
3163 | 11.0M | } |
3164 | | |
3165 | 10.7M | if (cm->interp_filter == SWITCHABLE) *rate2 += rs; |
3166 | | |
3167 | 10.7M | memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); |
3168 | 10.7M | memcpy(x->bsse, bsse, sizeof(bsse)); |
3169 | | |
3170 | 10.7M | if (!skip_txfm_sb || xd->lossless) { |
3171 | 10.7M | int skippable_y, skippable_uv; |
3172 | 10.7M | int64_t sseuv = INT64_MAX; |
3173 | 10.7M | int64_t rdcosty = INT64_MAX; |
3174 | | |
3175 | | // Y cost and distortion |
3176 | 10.7M | vp9_subtract_plane(x, bsize, 0); |
3177 | 10.7M | super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, |
3178 | 10.7M | ref_best_rd, recon); |
3179 | | |
3180 | 10.7M | if (*rate_y == INT_MAX) { |
3181 | 1.78M | *rate2 = INT_MAX; |
3182 | 1.78M | *distortion = INT64_MAX; |
3183 | 1.78M | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3184 | 1.78M | return INT64_MAX; |
3185 | 1.78M | } |
3186 | | |
3187 | 8.95M | *rate2 += *rate_y; |
3188 | 8.95M | *distortion += distortion_y; |
3189 | | |
3190 | 8.95M | rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); |
3191 | 8.95M | rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); |
3192 | | |
3193 | 8.95M | if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, |
3194 | 8.95M | &sseuv, bsize, ref_best_rd - rdcosty)) { |
3195 | 2.41M | *rate2 = INT_MAX; |
3196 | 2.41M | *distortion = INT64_MAX; |
3197 | 2.41M | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3198 | 2.41M | return INT64_MAX; |
3199 | 2.41M | } |
3200 | | |
3201 | 6.53M | *psse += sseuv; |
3202 | 6.53M | *rate2 += *rate_uv; |
3203 | 6.53M | *distortion += distortion_uv; |
3204 | 6.53M | *skippable = skippable_y && skippable_uv; |
3205 | 6.53M | } else { |
3206 | 41.5k | x->skip = 1; |
3207 | 41.5k | *disable_skip = 1; |
3208 | | |
3209 | | // The cost of skip bit needs to be added. |
3210 | 41.5k | *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); |
3211 | | |
3212 | 41.5k | *distortion = skip_sse_sb; |
3213 | 41.5k | } |
3214 | | |
3215 | 6.57M | if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable; |
3216 | | |
3217 | 6.57M | restore_dst_buf(xd, orig_dst, orig_dst_stride); |
3218 | 6.57M | return 0; // The rate-distortion cost will be re-calculated by caller. |
3219 | 10.7M | } |
3220 | | #endif // !CONFIG_REALTIME_ONLY |
3221 | | |
3222 | | void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, |
3223 | | BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, |
3224 | 2.85M | int64_t best_rd) { |
3225 | 2.85M | VP9_COMMON *const cm = &cpi->common; |
3226 | 2.85M | MACROBLOCKD *const xd = &x->e_mbd; |
3227 | 2.85M | struct macroblockd_plane *const pd = xd->plane; |
3228 | 2.85M | int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; |
3229 | 2.85M | int y_skip = 0, uv_skip = 0; |
3230 | 2.85M | int64_t dist_y = 0, dist_uv = 0; |
3231 | 2.85M | TX_SIZE max_uv_tx_size; |
3232 | 2.85M | x->skip_encode = 0; |
3233 | 2.85M | ctx->skip = 0; |
3234 | 2.85M | xd->mi[0]->ref_frame[0] = INTRA_FRAME; |
3235 | 2.85M | xd->mi[0]->ref_frame[1] = NO_REF_FRAME; |
3236 | | // Initialize interp_filter here so we do not have to check for inter block |
3237 | | // modes in get_pred_context_switchable_interp() |
3238 | 2.85M | xd->mi[0]->interp_filter = SWITCHABLE_FILTERS; |
3239 | | |
3240 | 2.85M | if (bsize >= BLOCK_8X8) { |
3241 | 1.53M | if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, |
3242 | 1.53M | &y_skip, bsize, best_rd) >= best_rd) { |
3243 | 230k | rd_cost->rate = INT_MAX; |
3244 | 230k | return; |
3245 | 230k | } |
3246 | 1.53M | } else { |
3247 | 1.31M | y_skip = 0; |
3248 | 1.31M | if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, |
3249 | 1.31M | &dist_y, best_rd) >= best_rd) { |
3250 | 288k | rd_cost->rate = INT_MAX; |
3251 | 288k | return; |
3252 | 288k | } |
3253 | 1.31M | } |
3254 | 2.33M | max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size] |
3255 | 2.33M | [pd[1].subsampling_x][pd[1].subsampling_y]; |
3256 | 2.33M | rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, |
3257 | 2.33M | &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size); |
3258 | | |
3259 | 2.33M | if (y_skip && uv_skip) { |
3260 | 206k | rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + |
3261 | 206k | vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); |
3262 | 206k | rd_cost->dist = dist_y + dist_uv; |
3263 | 2.13M | } else { |
3264 | 2.13M | rd_cost->rate = |
3265 | 2.13M | rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); |
3266 | 2.13M | rd_cost->dist = dist_y + dist_uv; |
3267 | 2.13M | } |
3268 | | |
3269 | 2.33M | ctx->mic = *xd->mi[0]; |
3270 | 2.33M | ctx->mbmi_ext = *x->mbmi_ext; |
3271 | 2.33M | rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); |
3272 | 2.33M | } |
3273 | | |
3274 | | #if !CONFIG_REALTIME_ONLY |
3275 | | // This function is designed to apply a bias or adjustment to an rd value based |
3276 | | // on the relative variance of the source and reconstruction. |
3277 | 0 | #define LOW_VAR_THRESH 250 |
3278 | 0 | #define VAR_MULT 250 |
3279 | | static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 250 }; |
3280 | | |
3281 | | static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, |
3282 | | BLOCK_SIZE bsize, int64_t *this_rd, |
3283 | | struct buf_2d *recon, |
3284 | | MV_REFERENCE_FRAME ref_frame, |
3285 | | MV_REFERENCE_FRAME second_ref_frame, |
3286 | 0 | PREDICTION_MODE this_mode) { |
3287 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
3288 | 0 | unsigned int rec_variance; |
3289 | 0 | unsigned int src_variance; |
3290 | 0 | unsigned int src_rec_min; |
3291 | 0 | unsigned int var_diff = 0; |
3292 | 0 | unsigned int var_factor = 0; |
3293 | 0 | unsigned int adj_max; |
3294 | 0 | unsigned int low_var_thresh = LOW_VAR_THRESH; |
3295 | 0 | const int bw = num_8x8_blocks_wide_lookup[bsize]; |
3296 | 0 | const int bh = num_8x8_blocks_high_lookup[bsize]; |
3297 | 0 | vp9e_tune_content content_type = cpi->oxcf.content; |
3298 | |
|
3299 | 0 | if (*this_rd == INT64_MAX) return; |
3300 | | |
3301 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
3302 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
3303 | 0 | rec_variance = vp9_high_get_sby_variance(cpi, recon, bsize, xd->bd); |
3304 | 0 | src_variance = |
3305 | 0 | vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd); |
3306 | 0 | } else { |
3307 | 0 | rec_variance = vp9_get_sby_variance(cpi, recon, bsize); |
3308 | 0 | src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); |
3309 | 0 | } |
3310 | | #else |
3311 | | rec_variance = vp9_get_sby_variance(cpi, recon, bsize); |
3312 | | src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); |
3313 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3314 | | |
3315 | | // Scale based on area in 8x8 blocks |
3316 | 0 | rec_variance /= (bw * bh); |
3317 | 0 | src_variance /= (bw * bh); |
3318 | |
|
3319 | 0 | if (content_type == VP9E_CONTENT_FILM) { |
3320 | 0 | if (cpi->oxcf.pass == 2) { |
3321 | | // Adjust low variance threshold based on estimated group noise enegry. |
3322 | 0 | double noise_factor = |
3323 | 0 | (double)cpi->twopass.gf_group.group_noise_energy / SECTION_NOISE_DEF; |
3324 | 0 | low_var_thresh = (unsigned int)(low_var_thresh * noise_factor); |
3325 | |
|
3326 | 0 | if (ref_frame == INTRA_FRAME) { |
3327 | 0 | low_var_thresh *= 2; |
3328 | 0 | if (this_mode == DC_PRED) low_var_thresh *= 5; |
3329 | 0 | } else if (second_ref_frame > INTRA_FRAME) { |
3330 | 0 | low_var_thresh *= 2; |
3331 | 0 | } |
3332 | 0 | } |
3333 | 0 | } else { |
3334 | 0 | low_var_thresh = LOW_VAR_THRESH / 2; |
3335 | 0 | } |
3336 | | |
3337 | | // Lower of source (raw per pixel value) and recon variance. Note that |
3338 | | // if the source per pixel is 0 then the recon value here will not be per |
3339 | | // pixel (see above) so will likely be much larger. |
3340 | 0 | src_rec_min = VPXMIN(src_variance, rec_variance); |
3341 | |
|
3342 | 0 | if (src_rec_min > low_var_thresh) return; |
3343 | | |
3344 | | // We care more when the reconstruction has lower variance so give this case |
3345 | | // a stronger weighting. |
3346 | 0 | var_diff = (src_variance > rec_variance) ? (src_variance - rec_variance) * 2 |
3347 | 0 | : (rec_variance - src_variance) / 2; |
3348 | |
|
3349 | 0 | adj_max = max_var_adjust[content_type]; |
3350 | |
|
3351 | 0 | var_factor = |
3352 | 0 | (unsigned int)((int64_t)VAR_MULT * var_diff) / VPXMAX(1, src_variance); |
3353 | 0 | var_factor = VPXMIN(adj_max, var_factor); |
3354 | |
|
3355 | 0 | if ((content_type == VP9E_CONTENT_FILM) && |
3356 | 0 | ((ref_frame == INTRA_FRAME) || (second_ref_frame > INTRA_FRAME))) { |
3357 | 0 | var_factor *= 2; |
3358 | 0 | } |
3359 | |
|
3360 | 0 | *this_rd += (*this_rd * var_factor) / 100; |
3361 | |
|
3362 | 0 | (void)xd; |
3363 | 0 | } |
3364 | | #endif // !CONFIG_REALTIME_ONLY |
3365 | | |
3366 | | // Do we have an internal image edge (e.g. formatting bars). |
3367 | 2.58M | int vp9_internal_image_edge(VP9_COMP *cpi) { |
3368 | 2.58M | return (cpi->oxcf.pass == 2) && |
3369 | 2.58M | ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) || |
3370 | 0 | (cpi->twopass.this_frame_stats.inactive_zone_cols > 0)); |
3371 | 2.58M | } |
3372 | | |
3373 | | // Checks to see if a super block is on a horizontal image edge. |
3374 | | // In most cases this is the "real" edge unless there are formatting |
3375 | | // bars embedded in the stream. |
3376 | 3.63M | int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) { |
3377 | 3.63M | int top_edge = 0; |
3378 | 3.63M | int bottom_edge = cpi->common.mi_rows; |
3379 | 3.63M | int is_active_h_edge = 0; |
3380 | | |
3381 | | // For two pass account for any formatting bars detected. |
3382 | 3.63M | if (cpi->oxcf.pass == 2) { |
3383 | 0 | TWO_PASS *twopass = &cpi->twopass; |
3384 | 0 | vpx_clear_system_state(); |
3385 | | |
3386 | | // The inactive region is specified in MBs not mi units. |
3387 | | // The image edge is in the following MB row. |
3388 | 0 | top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); |
3389 | |
|
3390 | 0 | bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); |
3391 | 0 | bottom_edge = VPXMAX(top_edge, bottom_edge); |
3392 | 0 | } |
3393 | | |
3394 | 3.63M | if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || |
3395 | 3.63M | ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { |
3396 | 2.56M | is_active_h_edge = 1; |
3397 | 2.56M | } |
3398 | 3.63M | return is_active_h_edge; |
3399 | 3.63M | } |
3400 | | |
3401 | | // Checks to see if a super block is on a vertical image edge. |
3402 | | // In most cases this is the "real" edge unless there are formatting |
3403 | | // bars embedded in the stream. |
3404 | 1.16M | int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) { |
3405 | 1.16M | int left_edge = 0; |
3406 | 1.16M | int right_edge = cpi->common.mi_cols; |
3407 | 1.16M | int is_active_v_edge = 0; |
3408 | | |
3409 | | // For two pass account for any formatting bars detected. |
3410 | 1.16M | if (cpi->oxcf.pass == 2) { |
3411 | 0 | TWO_PASS *twopass = &cpi->twopass; |
3412 | 0 | vpx_clear_system_state(); |
3413 | | |
3414 | | // The inactive region is specified in MBs not mi units. |
3415 | | // The image edge is in the following MB row. |
3416 | 0 | left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); |
3417 | |
|
3418 | 0 | right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); |
3419 | 0 | right_edge = VPXMAX(left_edge, right_edge); |
3420 | 0 | } |
3421 | | |
3422 | 1.16M | if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || |
3423 | 1.16M | ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { |
3424 | 237k | is_active_v_edge = 1; |
3425 | 237k | } |
3426 | 1.16M | return is_active_v_edge; |
3427 | 1.16M | } |
3428 | | |
3429 | | // Checks to see if a super block is at the edge of the active image. |
3430 | | // In most cases this is the "real" edge unless there are formatting |
3431 | | // bars embedded in the stream. |
3432 | 2.74M | int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) { |
3433 | 2.74M | return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) || |
3434 | 2.74M | vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE); |
3435 | 2.74M | } |
3436 | | |
3437 | | #if !CONFIG_REALTIME_ONLY |
3438 | 3.25M | static void init_frame_mv(int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]) { |
3439 | 48.7M | for (int mode = 0; mode < MB_MODE_COUNT; ++mode) { |
3440 | 227M | for (int ref_frame = 0; ref_frame < MAX_REF_FRAMES; ++ref_frame) { |
3441 | 182M | frame_mv[mode][ref_frame].as_int = INVALID_MV; |
3442 | 182M | } |
3443 | 45.5M | } |
3444 | 3.25M | } |
3445 | | |
3446 | | void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, |
3447 | | MACROBLOCK *x, int mi_row, int mi_col, |
3448 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
3449 | 3.25M | PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { |
3450 | 3.25M | VP9_COMMON *const cm = &cpi->common; |
3451 | 3.25M | TileInfo *const tile_info = &tile_data->tile_info; |
3452 | 3.25M | RD_OPT *const rd_opt = &cpi->rd; |
3453 | 3.25M | SPEED_FEATURES *const sf = &cpi->sf; |
3454 | 3.25M | MACROBLOCKD *const xd = &x->e_mbd; |
3455 | 3.25M | MODE_INFO *const mi = xd->mi[0]; |
3456 | 3.25M | MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; |
3457 | 3.25M | const struct segmentation *const seg = &cm->seg; |
3458 | 3.25M | PREDICTION_MODE this_mode; |
3459 | 3.25M | MV_REFERENCE_FRAME ref_frame, second_ref_frame; |
3460 | 3.25M | unsigned char segment_id = mi->segment_id; |
3461 | 3.25M | int comp_pred, i, k; |
3462 | 3.25M | int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; |
3463 | 3.25M | struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 }; |
3464 | 3.25M | int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; |
3465 | 3.25M | INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES]; |
3466 | 3.25M | int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES]; |
3467 | 3.25M | int single_mode_rate[MAX_REF_FRAMES][INTER_MODES]; |
3468 | 3.25M | int64_t best_rd = best_rd_so_far; |
3469 | 3.25M | int64_t best_pred_diff[REFERENCE_MODES]; |
3470 | 3.25M | int64_t best_pred_rd[REFERENCE_MODES]; |
3471 | 3.25M | int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; |
3472 | 3.25M | int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; |
3473 | 3.25M | MODE_INFO best_mbmode; |
3474 | 3.25M | int best_mode_skippable = 0; |
3475 | 3.25M | int midx, best_mode_index = -1; |
3476 | 3.25M | unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; |
3477 | 3.25M | vpx_prob comp_mode_p; |
3478 | 3.25M | int64_t best_intra_rd = INT64_MAX; |
3479 | 3.25M | unsigned int best_pred_sse = UINT_MAX; |
3480 | 3.25M | PREDICTION_MODE best_intra_mode = DC_PRED; |
3481 | 3.25M | int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; |
3482 | 3.25M | int64_t dist_uv[TX_SIZES]; |
3483 | 3.25M | int skip_uv[TX_SIZES]; |
3484 | 3.25M | PREDICTION_MODE mode_uv[TX_SIZES]; |
3485 | 3.25M | const int intra_cost_penalty = |
3486 | 3.25M | vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); |
3487 | 3.25M | int best_skip2 = 0; |
3488 | 3.25M | uint8_t ref_frame_skip_mask[2] = { 0, 1 }; |
3489 | 3.25M | uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; |
3490 | 3.25M | int mode_skip_start = sf->mode_skip_start + 1; |
3491 | 3.25M | const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; |
3492 | 3.25M | const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; |
3493 | 3.25M | int64_t mode_threshold[MAX_MODES]; |
3494 | 3.25M | int8_t *tile_mode_map = tile_data->mode_map[bsize]; |
3495 | 3.25M | int8_t mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid |
3496 | | // lock mechanism involved with reads from |
3497 | | // tile_mode_map |
3498 | 3.25M | const int mode_search_skip_flags = sf->mode_search_skip_flags; |
3499 | 3.25M | const int is_rect_partition = |
3500 | 3.25M | num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize]; |
3501 | 3.25M | int64_t mask_filter = 0; |
3502 | 3.25M | int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; |
3503 | | |
3504 | 3.25M | struct buf_2d *recon; |
3505 | 3.25M | struct buf_2d recon_buf; |
3506 | 3.25M | #if CONFIG_VP9_HIGHBITDEPTH |
3507 | 3.25M | DECLARE_ALIGNED(16, uint16_t, recon16[64 * 64]); |
3508 | 3.25M | recon_buf.buf = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH |
3509 | 3.25M | ? CONVERT_TO_BYTEPTR(recon16) |
3510 | 3.25M | : (uint8_t *)recon16; |
3511 | | #else |
3512 | | DECLARE_ALIGNED(16, uint8_t, recon8[64 * 64]); |
3513 | | recon_buf.buf = recon8; |
3514 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
3515 | 3.25M | recon_buf.stride = 64; |
3516 | 3.25M | recon = cpi->oxcf.content == VP9E_CONTENT_FILM ? &recon_buf : 0; |
3517 | | |
3518 | 3.25M | vp9_zero(best_mbmode); |
3519 | | |
3520 | 3.25M | x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; |
3521 | | |
3522 | 16.2M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; |
3523 | | |
3524 | 3.25M | estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, |
3525 | 3.25M | &comp_mode_p); |
3526 | | |
3527 | 13.0M | for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; |
3528 | 16.2M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) |
3529 | 13.0M | best_filter_rd[i] = INT64_MAX; |
3530 | 16.2M | for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX; |
3531 | 16.2M | for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; |
3532 | 48.7M | for (i = 0; i < MB_MODE_COUNT; ++i) { |
3533 | 227M | for (k = 0; k < MAX_REF_FRAMES; ++k) { |
3534 | 182M | single_inter_filter[i][k] = SWITCHABLE; |
3535 | 182M | single_skippable[i][k] = 0; |
3536 | 182M | } |
3537 | 45.5M | } |
3538 | | |
3539 | 3.25M | rd_cost->rate = INT_MAX; |
3540 | | |
3541 | 3.25M | init_frame_mv(frame_mv); |
3542 | | |
3543 | 13.0M | for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { |
3544 | 9.75M | x->pred_mv_sad[ref_frame] = INT_MAX; |
3545 | 9.75M | if ((cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) && |
3546 | 9.75M | !(is_rect_partition && (ctx->skip_ref_frame_mask & (1 << ref_frame)))) { |
3547 | 7.08M | assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); |
3548 | 7.08M | setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, |
3549 | 7.08M | frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); |
3550 | 7.08M | } |
3551 | 9.75M | frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; |
3552 | 9.75M | frame_mv[ZEROMV][ref_frame].as_int = 0; |
3553 | 9.75M | } |
3554 | | |
3555 | 13.0M | for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { |
3556 | 9.75M | if (!(cpi->ref_frame_flags & ref_frame_to_flag(ref_frame))) { |
3557 | | // Skip checking missing references in both single and compound reference |
3558 | | // modes. Note that a mode will be skipped if both reference frames |
3559 | | // are masked out. |
3560 | 2.26M | ref_frame_skip_mask[0] |= (1 << ref_frame); |
3561 | 2.26M | ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; |
3562 | 7.48M | } else if (sf->reference_masking) { |
3563 | 11.1M | for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { |
3564 | | // Skip fixed mv modes for poor references |
3565 | 8.57M | if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { |
3566 | 709k | mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO; |
3567 | 709k | break; |
3568 | 709k | } |
3569 | 8.57M | } |
3570 | 3.24M | } |
3571 | | // If the segment reference frame feature is enabled.... |
3572 | | // then do nothing if the current ref frame is not allowed.. |
3573 | 9.75M | if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && |
3574 | 9.75M | get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { |
3575 | 0 | ref_frame_skip_mask[0] |= (1 << ref_frame); |
3576 | 0 | ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; |
3577 | 0 | } |
3578 | 9.75M | } |
3579 | | |
3580 | | // Disable this drop out case if the ref frame |
3581 | | // segment level feature is enabled for this segment. This is to |
3582 | | // prevent the possibility that we end up unable to pick any mode. |
3583 | 3.25M | if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { |
3584 | | // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, |
3585 | | // unless ARNR filtering is enabled in which case we want |
3586 | | // an unfiltered alternative. We allow near/nearest as well |
3587 | | // because they may result in zero-zero MVs but be cheaper. |
3588 | 3.25M | if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { |
3589 | 0 | ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME); |
3590 | 0 | ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; |
3591 | 0 | mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; |
3592 | 0 | if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) |
3593 | 0 | mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV); |
3594 | 0 | if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) |
3595 | 0 | mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV); |
3596 | 0 | } |
3597 | 3.25M | } |
3598 | | |
3599 | 3.25M | if (cpi->rc.is_src_frame_alt_ref) { |
3600 | 0 | if (sf->alt_ref_search_fp) { |
3601 | 0 | mode_skip_mask[ALTREF_FRAME] = 0; |
3602 | 0 | ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff; |
3603 | 0 | ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; |
3604 | 0 | } |
3605 | 0 | } |
3606 | | |
3607 | 3.25M | if (sf->alt_ref_search_fp) |
3608 | 0 | if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX) |
3609 | 0 | if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1)) |
3610 | 0 | mode_skip_mask[ALTREF_FRAME] |= INTER_ALL; |
3611 | | |
3612 | 3.25M | if (sf->adaptive_mode_search) { |
3613 | 0 | if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && |
3614 | 0 | cpi->rc.frames_since_golden >= 3) |
3615 | 0 | if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1)) |
3616 | 0 | mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL; |
3617 | 0 | } |
3618 | | |
3619 | 3.25M | if (bsize > sf->max_intra_bsize && cpi->ref_frame_flags != 0) { |
3620 | 0 | ref_frame_skip_mask[0] |= (1 << INTRA_FRAME); |
3621 | 0 | ref_frame_skip_mask[1] |= (1 << INTRA_FRAME); |
3622 | 0 | } |
3623 | | |
3624 | 3.25M | mode_skip_mask[INTRA_FRAME] |= |
3625 | 3.25M | (uint16_t) ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]); |
3626 | | |
3627 | 26.0M | for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0; |
3628 | | |
3629 | 78.0M | for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) |
3630 | 74.7M | mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; |
3631 | | |
3632 | 3.25M | midx = sf->schedule_mode_search ? mode_skip_start : 0; |
3633 | | |
3634 | 3.25M | while (midx > 4) { |
3635 | 0 | uint8_t end_pos = 0; |
3636 | 0 | for (i = 5; i < midx; ++i) { |
3637 | 0 | if (mode_threshold[tile_mode_map[i - 1]] > |
3638 | 0 | mode_threshold[tile_mode_map[i]]) { |
3639 | 0 | uint8_t tmp = tile_mode_map[i]; |
3640 | 0 | tile_mode_map[i] = tile_mode_map[i - 1]; |
3641 | 0 | tile_mode_map[i - 1] = tmp; |
3642 | 0 | end_pos = i; |
3643 | 0 | } |
3644 | 0 | } |
3645 | 0 | midx = end_pos; |
3646 | 0 | } |
3647 | | |
3648 | 3.25M | memcpy(mode_map, tile_mode_map, sizeof(mode_map)); |
3649 | | |
3650 | 99.5M | for (midx = 0; midx < MAX_MODES; ++midx) { |
3651 | 96.3M | int mode_index = mode_map[midx]; |
3652 | 96.3M | int mode_excluded = 0; |
3653 | 96.3M | int64_t this_rd = INT64_MAX; |
3654 | 96.3M | int disable_skip = 0; |
3655 | 96.3M | int compmode_cost = 0; |
3656 | 96.3M | int rate2 = 0, rate_y = 0, rate_uv = 0; |
3657 | 96.3M | int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; |
3658 | 96.3M | int skippable = 0; |
3659 | 96.3M | int this_skip2 = 0; |
3660 | 96.3M | int64_t total_sse = INT64_MAX; |
3661 | 96.3M | int early_term = 0; |
3662 | | |
3663 | 96.3M | this_mode = vp9_mode_order[mode_index].mode; |
3664 | 96.3M | ref_frame = vp9_mode_order[mode_index].ref_frame[0]; |
3665 | 96.3M | second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; |
3666 | | |
3667 | 96.3M | vp9_zero(x->sum_y_eobs); |
3668 | 96.3M | comp_pred = second_ref_frame > INTRA_FRAME; |
3669 | 96.3M | if (!comp_pred && ref_frame != INTRA_FRAME && |
3670 | 96.3M | sf->prune_single_mode_based_on_mv_diff_mode_rate) |
3671 | 15.8M | single_mode_rate[ref_frame][INTER_OFFSET(this_mode)] = INT_MAX; |
3672 | | |
3673 | 96.3M | if (is_rect_partition) { |
3674 | 33.6M | if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue; |
3675 | 29.0M | if (second_ref_frame > 0 && |
3676 | 29.0M | (ctx->skip_ref_frame_mask & (1 << second_ref_frame))) |
3677 | 829k | continue; |
3678 | 29.0M | } |
3679 | | |
3680 | | // Look at the reference frame of the best mode so far and set the |
3681 | | // skip mask to look at a subset of the remaining modes. |
3682 | 90.8M | if (midx == mode_skip_start && best_mode_index >= 0) { |
3683 | 1.01M | switch (best_mbmode.ref_frame[0]) { |
3684 | 534k | case INTRA_FRAME: break; |
3685 | 322k | case LAST_FRAME: ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; break; |
3686 | 137k | case GOLDEN_FRAME: |
3687 | 137k | ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK; |
3688 | 137k | break; |
3689 | 24.4k | case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break; |
3690 | 0 | case NO_REF_FRAME: |
3691 | 0 | case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break; |
3692 | 1.01M | } |
3693 | 1.01M | } |
3694 | | |
3695 | 90.8M | if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && |
3696 | 90.8M | (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) |
3697 | 15.9M | continue; |
3698 | | |
3699 | 74.8M | if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue; |
3700 | | |
3701 | | // Test best rd so far against threshold for trying this mode. |
3702 | 71.2M | if (best_mode_skippable && sf->schedule_mode_search) |
3703 | 0 | mode_threshold[mode_index] <<= 1; |
3704 | | |
3705 | 71.2M | if (best_rd < mode_threshold[mode_index]) continue; |
3706 | | |
3707 | | // This is only used in motion vector unit test. |
3708 | 64.0M | if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; |
3709 | | |
3710 | 64.0M | if (sf->motion_field_mode_search) { |
3711 | 0 | const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], |
3712 | 0 | tile_info->mi_col_end - mi_col); |
3713 | 0 | const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize], |
3714 | 0 | tile_info->mi_row_end - mi_row); |
3715 | 0 | const int bsl = mi_width_log2_lookup[bsize]; |
3716 | 0 | int cb_partition_search_ctrl = |
3717 | 0 | (((mi_row + mi_col) >> bsl) + |
3718 | 0 | get_chessboard_index(cm->current_video_frame)) & |
3719 | 0 | 0x1; |
3720 | 0 | MODE_INFO *ref_mi; |
3721 | 0 | int const_motion = 1; |
3722 | 0 | int skip_ref_frame = !cb_partition_search_ctrl; |
3723 | 0 | MV_REFERENCE_FRAME rf = NO_REF_FRAME; |
3724 | 0 | int_mv ref_mv; |
3725 | 0 | ref_mv.as_int = INVALID_MV; |
3726 | |
|
3727 | 0 | if ((mi_row - 1) >= tile_info->mi_row_start) { |
3728 | 0 | ref_mv = xd->mi[-xd->mi_stride]->mv[0]; |
3729 | 0 | rf = xd->mi[-xd->mi_stride]->ref_frame[0]; |
3730 | 0 | for (i = 0; i < mi_width; ++i) { |
3731 | 0 | ref_mi = xd->mi[-xd->mi_stride + i]; |
3732 | 0 | const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && |
3733 | 0 | (ref_frame == ref_mi->ref_frame[0]); |
3734 | 0 | skip_ref_frame &= (rf == ref_mi->ref_frame[0]); |
3735 | 0 | } |
3736 | 0 | } |
3737 | |
|
3738 | 0 | if ((mi_col - 1) >= tile_info->mi_col_start) { |
3739 | 0 | if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0]; |
3740 | 0 | if (rf == NO_REF_FRAME) rf = xd->mi[-1]->ref_frame[0]; |
3741 | 0 | for (i = 0; i < mi_height; ++i) { |
3742 | 0 | ref_mi = xd->mi[i * xd->mi_stride - 1]; |
3743 | 0 | const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && |
3744 | 0 | (ref_frame == ref_mi->ref_frame[0]); |
3745 | 0 | skip_ref_frame &= (rf == ref_mi->ref_frame[0]); |
3746 | 0 | } |
3747 | 0 | } |
3748 | |
|
3749 | 0 | if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV) |
3750 | 0 | if (rf > INTRA_FRAME) |
3751 | 0 | if (ref_frame != rf) continue; |
3752 | | |
3753 | 0 | if (const_motion) |
3754 | 0 | if (this_mode == NEARMV || this_mode == ZEROMV) continue; |
3755 | 0 | } |
3756 | | |
3757 | 64.0M | if (comp_pred) { |
3758 | 17.5M | if (!cpi->allow_comp_inter_inter) continue; |
3759 | | |
3760 | 0 | if (cm->ref_frame_sign_bias[ref_frame] == |
3761 | 0 | cm->ref_frame_sign_bias[second_ref_frame]) |
3762 | 0 | continue; |
3763 | | |
3764 | | // Skip compound inter modes if ARF is not available. |
3765 | 0 | if (!(cpi->ref_frame_flags & ref_frame_to_flag(second_ref_frame))) |
3766 | 0 | continue; |
3767 | | |
3768 | | // Do not allow compound prediction if the segment level reference frame |
3769 | | // feature is in use as in this case there can only be one reference. |
3770 | 0 | if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; |
3771 | | |
3772 | 0 | if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && |
3773 | 0 | best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME) |
3774 | 0 | continue; |
3775 | | |
3776 | 0 | mode_excluded = cm->reference_mode == SINGLE_REFERENCE; |
3777 | 46.4M | } else { |
3778 | 46.4M | if (ref_frame != INTRA_FRAME) |
3779 | 24.8M | mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; |
3780 | 46.4M | } |
3781 | | |
3782 | 46.4M | if (ref_frame == INTRA_FRAME) { |
3783 | 21.6M | if (sf->adaptive_mode_search) |
3784 | 0 | if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse) |
3785 | 0 | continue; |
3786 | | |
3787 | 21.6M | if (this_mode != DC_PRED) { |
3788 | | // Disable intra modes other than DC_PRED for blocks with low variance |
3789 | | // Threshold for intra skipping based on source variance |
3790 | | // TODO(debargha): Specialize the threshold for super block sizes |
3791 | 18.5M | const unsigned int skip_intra_var_thresh = |
3792 | 18.5M | (cpi->oxcf.content == VP9E_CONTENT_FILM) ? 0 : 64; |
3793 | 18.5M | if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && |
3794 | 18.5M | x->source_variance < skip_intra_var_thresh) |
3795 | 0 | continue; |
3796 | | // Only search the oblique modes if the best so far is |
3797 | | // one of the neighboring directional modes |
3798 | 18.5M | if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && |
3799 | 18.5M | (this_mode >= D45_PRED && this_mode <= TM_PRED)) { |
3800 | 0 | if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME) |
3801 | 0 | continue; |
3802 | 0 | } |
3803 | 18.5M | if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { |
3804 | 0 | if (conditional_skipintra(this_mode, best_intra_mode)) continue; |
3805 | 0 | } |
3806 | 18.5M | } |
3807 | 24.8M | } else { |
3808 | 24.8M | const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame }; |
3809 | 24.8M | if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode, |
3810 | 24.8M | ref_frames)) |
3811 | 4.81M | continue; |
3812 | 24.8M | } |
3813 | | |
3814 | 41.6M | mi->mode = this_mode; |
3815 | 41.6M | mi->uv_mode = DC_PRED; |
3816 | 41.6M | mi->ref_frame[0] = ref_frame; |
3817 | 41.6M | mi->ref_frame[1] = second_ref_frame; |
3818 | | // Evaluate all sub-pel filters irrespective of whether we can use |
3819 | | // them for this frame. |
3820 | 41.6M | mi->interp_filter = |
3821 | 41.6M | cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; |
3822 | 41.6M | mi->mv[0].as_int = mi->mv[1].as_int = 0; |
3823 | | |
3824 | 41.6M | x->skip = 0; |
3825 | 41.6M | set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); |
3826 | | |
3827 | | // Select prediction reference frames. |
3828 | 166M | for (i = 0; i < MAX_MB_PLANE; i++) { |
3829 | 125M | xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; |
3830 | 125M | if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; |
3831 | 125M | } |
3832 | | |
3833 | 41.6M | if (ref_frame == INTRA_FRAME) { |
3834 | 21.6M | TX_SIZE uv_tx; |
3835 | 21.6M | struct macroblockd_plane *const pd = &xd->plane[1]; |
3836 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3837 | | start_timing(cpi, intra_mode_search_time); |
3838 | | #endif |
3839 | 21.6M | memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); |
3840 | 21.6M | super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, |
3841 | 21.6M | best_rd, recon); |
3842 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3843 | | end_timing(cpi, intra_mode_search_time); |
3844 | | #endif |
3845 | 21.6M | if (rate_y == INT_MAX) continue; |
3846 | | |
3847 | 13.8M | uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x] |
3848 | 13.8M | [pd->subsampling_y]; |
3849 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3850 | | start_timing(cpi, intra_mode_search_time); |
3851 | | #endif |
3852 | 13.8M | if (rate_uv_intra[uv_tx] == INT_MAX) { |
3853 | 2.37M | choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx], |
3854 | 2.37M | &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx], |
3855 | 2.37M | &skip_uv[uv_tx], &mode_uv[uv_tx]); |
3856 | 2.37M | } |
3857 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3858 | | end_timing(cpi, intra_mode_search_time); |
3859 | | #endif |
3860 | 13.8M | rate_uv = rate_uv_tokenonly[uv_tx]; |
3861 | 13.8M | distortion_uv = dist_uv[uv_tx]; |
3862 | 13.8M | skippable = skippable && skip_uv[uv_tx]; |
3863 | 13.8M | mi->uv_mode = mode_uv[uv_tx]; |
3864 | | |
3865 | 13.8M | rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx]; |
3866 | 13.8M | if (this_mode != DC_PRED && this_mode != TM_PRED) |
3867 | 10.0M | rate2 += intra_cost_penalty; |
3868 | 13.8M | distortion2 = distortion_y + distortion_uv; |
3869 | 20.0M | } else { |
3870 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3871 | | start_timing(cpi, handle_inter_mode_time); |
3872 | | #endif |
3873 | 20.0M | this_rd = handle_inter_mode( |
3874 | 20.0M | cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv, |
3875 | 20.0M | recon, &disable_skip, frame_mv, mi_row, mi_col, single_newmv, |
3876 | 20.0M | single_inter_filter, single_skippable, |
3877 | 20.0M | &single_mode_rate[ref_frame][0], &total_sse, best_rd, &mask_filter, |
3878 | 20.0M | filter_cache, best_mode_index); |
3879 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
3880 | | end_timing(cpi, handle_inter_mode_time); |
3881 | | #endif |
3882 | 20.0M | if (this_rd == INT64_MAX) continue; |
3883 | | |
3884 | 6.57M | compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); |
3885 | | |
3886 | 6.57M | if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; |
3887 | 6.57M | } |
3888 | | |
3889 | | // Estimate the reference frame signaling cost and add it |
3890 | | // to the rolling cost variable. |
3891 | 20.3M | if (comp_pred) { |
3892 | 0 | rate2 += ref_costs_comp[ref_frame]; |
3893 | 20.3M | } else { |
3894 | 20.3M | rate2 += ref_costs_single[ref_frame]; |
3895 | 20.3M | } |
3896 | | |
3897 | 20.3M | if (!disable_skip) { |
3898 | 20.3M | const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); |
3899 | 20.3M | const int skip_cost0 = vp9_cost_bit(skip_prob, 0); |
3900 | 20.3M | const int skip_cost1 = vp9_cost_bit(skip_prob, 1); |
3901 | | |
3902 | 20.3M | if (skippable) { |
3903 | | // Back out the coefficient coding costs |
3904 | 1.00M | rate2 -= (rate_y + rate_uv); |
3905 | | |
3906 | | // Cost the skip mb case |
3907 | 1.00M | rate2 += skip_cost1; |
3908 | 19.3M | } else if (ref_frame != INTRA_FRAME && !xd->lossless && |
3909 | 19.3M | !cpi->oxcf.sharpness) { |
3910 | 6.04M | if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, |
3911 | 6.04M | distortion2) < |
3912 | 6.04M | RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { |
3913 | | // Add in the cost of the no skip flag. |
3914 | 5.57M | rate2 += skip_cost0; |
3915 | 5.57M | } else { |
3916 | | // FIXME(rbultje) make this work for splitmv also |
3917 | 471k | assert(total_sse >= 0); |
3918 | | |
3919 | 471k | rate2 += skip_cost1; |
3920 | 471k | distortion2 = total_sse; |
3921 | 471k | rate2 -= (rate_y + rate_uv); |
3922 | 471k | this_skip2 = 1; |
3923 | 471k | } |
3924 | 13.2M | } else { |
3925 | | // Add in the cost of the no skip flag. |
3926 | 13.2M | rate2 += skip_cost0; |
3927 | 13.2M | } |
3928 | | |
3929 | | // Calculate the final RD estimate for this mode. |
3930 | 20.3M | this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); |
3931 | 20.3M | } |
3932 | | |
3933 | 20.3M | if (recon) { |
3934 | | // In film mode bias against DC pred and other intra if there is a |
3935 | | // significant difference between the variance of the sub blocks in the |
3936 | | // the source. Also apply some bias against compound modes which also |
3937 | | // tend to blur fine texture such as film grain over time. |
3938 | | // |
3939 | | // The sub block test here acts in the case where one or more sub |
3940 | | // blocks have high relatively variance but others relatively low |
3941 | | // variance. Here the high variance sub blocks may push the |
3942 | | // total variance for the current block size over the thresholds |
3943 | | // used in rd_variance_adjustment() below. |
3944 | 0 | if (cpi->oxcf.content == VP9E_CONTENT_FILM) { |
3945 | 0 | if (bsize >= BLOCK_16X16) { |
3946 | 0 | int min_energy, max_energy; |
3947 | 0 | vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, |
3948 | 0 | &max_energy); |
3949 | 0 | if (max_energy > min_energy) { |
3950 | 0 | if (ref_frame == INTRA_FRAME) { |
3951 | 0 | if (this_mode == DC_PRED) |
3952 | 0 | this_rd += (this_rd * (max_energy - min_energy)); |
3953 | 0 | else |
3954 | 0 | this_rd += (this_rd * (max_energy - min_energy)) / 4; |
3955 | 0 | } else if (second_ref_frame > INTRA_FRAME) { |
3956 | 0 | this_rd += this_rd / 4; |
3957 | 0 | } |
3958 | 0 | } |
3959 | 0 | } |
3960 | 0 | } |
3961 | | // Apply an adjustment to the rd value based on the similarity of the |
3962 | | // source variance and reconstructed variance. |
3963 | 0 | rd_variance_adjustment(cpi, x, bsize, &this_rd, recon, ref_frame, |
3964 | 0 | second_ref_frame, this_mode); |
3965 | 0 | } |
3966 | | |
3967 | 20.3M | if (ref_frame == INTRA_FRAME) { |
3968 | | // Keep record of best intra rd |
3969 | 13.8M | if (this_rd < best_intra_rd) { |
3970 | 3.40M | best_intra_rd = this_rd; |
3971 | 3.40M | best_intra_mode = mi->mode; |
3972 | 3.40M | } |
3973 | 13.8M | } |
3974 | | |
3975 | 20.3M | if (!disable_skip && ref_frame == INTRA_FRAME) { |
3976 | 55.2M | for (i = 0; i < REFERENCE_MODES; ++i) |
3977 | 41.4M | best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); |
3978 | 69.0M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) |
3979 | 55.2M | best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); |
3980 | 13.8M | } |
3981 | | |
3982 | | // Did this mode help.. i.e. is it the new best mode |
3983 | 20.3M | if (this_rd < best_rd || x->skip) { |
3984 | 5.60M | int max_plane = MAX_MB_PLANE; |
3985 | 5.60M | if (!mode_excluded) { |
3986 | | // Note index of best mode so far |
3987 | 5.60M | best_mode_index = mode_index; |
3988 | | |
3989 | 5.60M | if (ref_frame == INTRA_FRAME) { |
3990 | | /* required for left and above block mv */ |
3991 | 2.50M | mi->mv[0].as_int = 0; |
3992 | 2.50M | max_plane = 1; |
3993 | | // Initialize interp_filter here so we do not have to check for |
3994 | | // inter block modes in get_pred_context_switchable_interp() |
3995 | 2.50M | mi->interp_filter = SWITCHABLE_FILTERS; |
3996 | 3.10M | } else { |
3997 | 3.10M | best_pred_sse = x->pred_sse[ref_frame]; |
3998 | 3.10M | } |
3999 | | |
4000 | 5.60M | rd_cost->rate = rate2; |
4001 | 5.60M | rd_cost->dist = distortion2; |
4002 | 5.60M | rd_cost->rdcost = this_rd; |
4003 | 5.60M | best_rd = this_rd; |
4004 | 5.60M | best_mbmode = *mi; |
4005 | 5.60M | best_skip2 = this_skip2; |
4006 | 5.60M | best_mode_skippable = skippable; |
4007 | | |
4008 | 5.60M | if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); |
4009 | 5.60M | memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size], |
4010 | 5.60M | sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); |
4011 | 5.60M | ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size]; |
4012 | | |
4013 | | // TODO(debargha): enhance this test with a better distortion prediction |
4014 | | // based on qp, activity mask and history |
4015 | 5.60M | if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && |
4016 | 5.60M | (mode_index > MIN_EARLY_TERM_INDEX)) { |
4017 | 0 | int qstep = xd->plane[0].dequant[1]; |
4018 | | // TODO(debargha): Enhance this by specializing for each mode_index |
4019 | 0 | int scale = 4; |
4020 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
4021 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
4022 | 0 | qstep >>= (xd->bd - 8); |
4023 | 0 | } |
4024 | 0 | #endif // CONFIG_VP9_HIGHBITDEPTH |
4025 | 0 | if (x->source_variance < UINT_MAX) { |
4026 | 0 | const int var_adjust = (x->source_variance < 16); |
4027 | 0 | scale -= var_adjust; |
4028 | 0 | } |
4029 | 0 | if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) { |
4030 | 0 | early_term = 1; |
4031 | 0 | } |
4032 | 0 | } |
4033 | 5.60M | } |
4034 | 5.60M | } |
4035 | | |
4036 | | /* keep record of best compound/single-only prediction */ |
4037 | 20.3M | if (!disable_skip && ref_frame != INTRA_FRAME) { |
4038 | 6.53M | int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; |
4039 | | |
4040 | 6.53M | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
4041 | 0 | single_rate = rate2 - compmode_cost; |
4042 | 0 | hybrid_rate = rate2; |
4043 | 6.53M | } else { |
4044 | 6.53M | single_rate = rate2; |
4045 | 6.53M | hybrid_rate = rate2 + compmode_cost; |
4046 | 6.53M | } |
4047 | | |
4048 | 6.53M | single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); |
4049 | 6.53M | hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); |
4050 | | |
4051 | 6.53M | if (!comp_pred) { |
4052 | 6.53M | if (single_rd < best_pred_rd[SINGLE_REFERENCE]) |
4053 | 3.36M | best_pred_rd[SINGLE_REFERENCE] = single_rd; |
4054 | 6.53M | } else { |
4055 | 0 | if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) |
4056 | 0 | best_pred_rd[COMPOUND_REFERENCE] = single_rd; |
4057 | 0 | } |
4058 | 6.53M | if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) |
4059 | 3.35M | best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; |
4060 | | |
4061 | | /* keep record of best filter type */ |
4062 | 6.53M | if (!mode_excluded && cm->interp_filter != BILINEAR) { |
4063 | 6.53M | int64_t ref = |
4064 | 6.53M | filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS |
4065 | 6.53M | : cm->interp_filter]; |
4066 | | |
4067 | 32.6M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { |
4068 | 26.1M | int64_t adj_rd; |
4069 | 26.1M | if (ref == INT64_MAX) |
4070 | 1.60M | adj_rd = 0; |
4071 | 24.5M | else if (filter_cache[i] == INT64_MAX) |
4072 | | // when early termination is triggered, the encoder does not have |
4073 | | // access to the rate-distortion cost. it only knows that the cost |
4074 | | // should be above the maximum valid value. hence it takes the known |
4075 | | // maximum plus an arbitrary constant as the rate-distortion cost. |
4076 | 1.76M | adj_rd = mask_filter - ref + 10; |
4077 | 22.7M | else |
4078 | 22.7M | adj_rd = filter_cache[i] - ref; |
4079 | | |
4080 | 26.1M | adj_rd += this_rd; |
4081 | 26.1M | best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); |
4082 | 26.1M | } |
4083 | 6.53M | } |
4084 | 6.53M | } |
4085 | | |
4086 | 20.3M | if (early_term) break; |
4087 | | |
4088 | 20.3M | if (x->skip && !comp_pred) break; |
4089 | 20.3M | } |
4090 | | |
4091 | | // The inter modes' rate costs are not calculated precisely in some cases. |
4092 | | // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and |
4093 | | // ZEROMV. Here, checks are added for those cases, and the mode decisions |
4094 | | // are corrected. |
4095 | 3.25M | if (best_mbmode.mode == NEWMV) { |
4096 | 345k | const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0], |
4097 | 345k | best_mbmode.ref_frame[1] }; |
4098 | 345k | int comp_pred_mode = refs[1] > INTRA_FRAME; |
4099 | | |
4100 | 345k | if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int && |
4101 | 345k | ((comp_pred_mode && |
4102 | 5.05k | frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || |
4103 | 5.05k | !comp_pred_mode)) |
4104 | 5.05k | best_mbmode.mode = NEARESTMV; |
4105 | 340k | else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int && |
4106 | 340k | ((comp_pred_mode && |
4107 | 4.59k | frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || |
4108 | 4.59k | !comp_pred_mode)) |
4109 | 4.59k | best_mbmode.mode = NEARMV; |
4110 | 335k | else if (best_mbmode.mv[0].as_int == 0 && |
4111 | 335k | ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || |
4112 | 116 | !comp_pred_mode)) |
4113 | 116 | best_mbmode.mode = ZEROMV; |
4114 | 345k | } |
4115 | | |
4116 | 3.25M | if (best_mode_index < 0 || best_rd >= best_rd_so_far) { |
4117 | | // If adaptive interp filter is enabled, then the current leaf node of 8x8 |
4118 | | // data is needed for sub8x8. Hence preserve the context. |
4119 | 687k | if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; |
4120 | 687k | rd_cost->rate = INT_MAX; |
4121 | 687k | rd_cost->rdcost = INT64_MAX; |
4122 | 687k | return; |
4123 | 687k | } |
4124 | | |
4125 | | // If we used an estimate for the uv intra rd in the loop above... |
4126 | 2.56M | if (sf->use_uv_intra_rd_estimate) { |
4127 | | // Do Intra UV best rd mode selection if best mode choice above was intra. |
4128 | 0 | if (best_mbmode.ref_frame[0] == INTRA_FRAME) { |
4129 | 0 | TX_SIZE uv_tx_size; |
4130 | 0 | *mi = best_mbmode; |
4131 | 0 | uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); |
4132 | 0 | rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], |
4133 | 0 | &rate_uv_tokenonly[uv_tx_size], |
4134 | 0 | &dist_uv[uv_tx_size], &skip_uv[uv_tx_size], |
4135 | 0 | bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, |
4136 | 0 | uv_tx_size); |
4137 | 0 | } |
4138 | 0 | } |
4139 | | |
4140 | 2.56M | assert((cm->interp_filter == SWITCHABLE) || |
4141 | 2.56M | (cm->interp_filter == best_mbmode.interp_filter) || |
4142 | 2.56M | !is_inter_block(&best_mbmode)); |
4143 | | |
4144 | 2.56M | if (!cpi->rc.is_src_frame_alt_ref) |
4145 | 2.56M | vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, |
4146 | 2.56M | sf->adaptive_rd_thresh, bsize, best_mode_index); |
4147 | | |
4148 | | // macroblock modes |
4149 | 2.56M | *mi = best_mbmode; |
4150 | 2.56M | x->skip |= best_skip2; |
4151 | | |
4152 | 10.2M | for (i = 0; i < REFERENCE_MODES; ++i) { |
4153 | 7.68M | if (best_pred_rd[i] == INT64_MAX) |
4154 | 580k | best_pred_diff[i] = INT_MIN; |
4155 | 7.10M | else |
4156 | 7.10M | best_pred_diff[i] = best_rd - best_pred_rd[i]; |
4157 | 7.68M | } |
4158 | | |
4159 | 2.56M | if (!x->skip) { |
4160 | 12.0M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { |
4161 | 9.65M | if (best_filter_rd[i] == INT64_MAX) |
4162 | 0 | best_filter_diff[i] = 0; |
4163 | 9.65M | else |
4164 | 9.65M | best_filter_diff[i] = best_rd - best_filter_rd[i]; |
4165 | 9.65M | } |
4166 | 2.41M | if (cm->interp_filter == SWITCHABLE) |
4167 | 1.33M | assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); |
4168 | 2.41M | } else { |
4169 | 148k | vp9_zero(best_filter_diff); |
4170 | 148k | } |
4171 | | |
4172 | | // TODO(yunqingwang): Moving this line in front of the above best_filter_diff |
4173 | | // updating code causes PSNR loss. Need to figure out the confliction. |
4174 | 2.56M | x->skip |= best_mode_skippable; |
4175 | | |
4176 | 2.56M | if (!x->skip && !x->select_tx_size) { |
4177 | 1.17M | int has_high_freq_coeff = 0; |
4178 | 1.17M | int plane; |
4179 | 1.17M | int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1; |
4180 | 3.10M | for (plane = 0; plane < max_plane; ++plane) { |
4181 | 1.92M | x->plane[plane].eobs = ctx->eobs_pbuf[plane][1]; |
4182 | 1.92M | has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); |
4183 | 1.92M | } |
4184 | | |
4185 | 2.77M | for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) { |
4186 | 1.59M | x->plane[plane].eobs = ctx->eobs_pbuf[plane][2]; |
4187 | 1.59M | has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); |
4188 | 1.59M | } |
4189 | | |
4190 | 1.17M | best_mode_skippable |= !has_high_freq_coeff; |
4191 | 1.17M | } |
4192 | | |
4193 | 2.56M | assert(best_mode_index >= 0); |
4194 | | |
4195 | 2.56M | store_coding_context(x, ctx, best_mode_index, best_pred_diff, |
4196 | 2.56M | best_filter_diff, best_mode_skippable); |
4197 | 2.56M | } |
4198 | | |
4199 | | void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data, |
4200 | | MACROBLOCK *x, RD_COST *rd_cost, |
4201 | | BLOCK_SIZE bsize, |
4202 | | PICK_MODE_CONTEXT *ctx, |
4203 | 0 | int64_t best_rd_so_far) { |
4204 | 0 | VP9_COMMON *const cm = &cpi->common; |
4205 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
4206 | 0 | MODE_INFO *const mi = xd->mi[0]; |
4207 | 0 | unsigned char segment_id = mi->segment_id; |
4208 | 0 | const int comp_pred = 0; |
4209 | 0 | int i; |
4210 | 0 | int64_t best_pred_diff[REFERENCE_MODES]; |
4211 | 0 | int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; |
4212 | 0 | unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; |
4213 | 0 | vpx_prob comp_mode_p; |
4214 | 0 | INTERP_FILTER best_filter = SWITCHABLE; |
4215 | 0 | int64_t this_rd = INT64_MAX; |
4216 | 0 | int rate2 = 0; |
4217 | 0 | const int64_t distortion2 = 0; |
4218 | |
|
4219 | 0 | x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; |
4220 | |
|
4221 | 0 | estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, |
4222 | 0 | &comp_mode_p); |
4223 | |
|
4224 | 0 | for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; |
4225 | 0 | for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX; |
4226 | |
|
4227 | 0 | rd_cost->rate = INT_MAX; |
4228 | |
|
4229 | 0 | assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); |
4230 | |
|
4231 | 0 | mi->mode = ZEROMV; |
4232 | 0 | mi->uv_mode = DC_PRED; |
4233 | 0 | mi->ref_frame[0] = LAST_FRAME; |
4234 | 0 | mi->ref_frame[1] = NO_REF_FRAME; |
4235 | 0 | mi->mv[0].as_int = 0; |
4236 | 0 | x->skip = 1; |
4237 | |
|
4238 | 0 | ctx->sum_y_eobs = 0; |
4239 | |
|
4240 | 0 | if (cm->interp_filter != BILINEAR) { |
4241 | 0 | best_filter = EIGHTTAP; |
4242 | 0 | if (cm->interp_filter == SWITCHABLE && |
4243 | 0 | x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { |
4244 | 0 | int rs; |
4245 | 0 | int best_rs = INT_MAX; |
4246 | 0 | for (i = 0; i < SWITCHABLE_FILTERS; ++i) { |
4247 | 0 | mi->interp_filter = i; |
4248 | 0 | rs = vp9_get_switchable_rate(cpi, xd); |
4249 | 0 | if (rs < best_rs) { |
4250 | 0 | best_rs = rs; |
4251 | 0 | best_filter = mi->interp_filter; |
4252 | 0 | } |
4253 | 0 | } |
4254 | 0 | } |
4255 | 0 | } |
4256 | | // Set the appropriate filter |
4257 | 0 | if (cm->interp_filter == SWITCHABLE) { |
4258 | 0 | mi->interp_filter = best_filter; |
4259 | 0 | rate2 += vp9_get_switchable_rate(cpi, xd); |
4260 | 0 | } else { |
4261 | 0 | mi->interp_filter = cm->interp_filter; |
4262 | 0 | } |
4263 | |
|
4264 | 0 | if (cm->reference_mode == REFERENCE_MODE_SELECT) |
4265 | 0 | rate2 += vp9_cost_bit(comp_mode_p, comp_pred); |
4266 | | |
4267 | | // Estimate the reference frame signaling cost and add it |
4268 | | // to the rolling cost variable. |
4269 | 0 | rate2 += ref_costs_single[LAST_FRAME]; |
4270 | 0 | this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); |
4271 | |
|
4272 | 0 | rd_cost->rate = rate2; |
4273 | 0 | rd_cost->dist = distortion2; |
4274 | 0 | rd_cost->rdcost = this_rd; |
4275 | |
|
4276 | 0 | if (this_rd >= best_rd_so_far) { |
4277 | 0 | rd_cost->rate = INT_MAX; |
4278 | 0 | rd_cost->rdcost = INT64_MAX; |
4279 | 0 | return; |
4280 | 0 | } |
4281 | | |
4282 | 0 | assert((cm->interp_filter == SWITCHABLE) || |
4283 | 0 | (cm->interp_filter == mi->interp_filter)); |
4284 | |
|
4285 | 0 | vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, |
4286 | 0 | cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); |
4287 | |
|
4288 | 0 | vp9_zero(best_pred_diff); |
4289 | 0 | vp9_zero(best_filter_diff); |
4290 | |
|
4291 | 0 | if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); |
4292 | 0 | store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0); |
4293 | 0 | } |
4294 | | |
4295 | | void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, |
4296 | | MACROBLOCK *x, int mi_row, int mi_col, |
4297 | | RD_COST *rd_cost, BLOCK_SIZE bsize, |
4298 | | PICK_MODE_CONTEXT *ctx, |
4299 | 2.74M | int64_t best_rd_so_far) { |
4300 | 2.74M | VP9_COMMON *const cm = &cpi->common; |
4301 | 2.74M | RD_OPT *const rd_opt = &cpi->rd; |
4302 | 2.74M | SPEED_FEATURES *const sf = &cpi->sf; |
4303 | 2.74M | MACROBLOCKD *const xd = &x->e_mbd; |
4304 | 2.74M | MODE_INFO *const mi = xd->mi[0]; |
4305 | 2.74M | const struct segmentation *const seg = &cm->seg; |
4306 | 2.74M | MV_REFERENCE_FRAME ref_frame, second_ref_frame; |
4307 | 2.74M | unsigned char segment_id = mi->segment_id; |
4308 | 2.74M | int comp_pred, i; |
4309 | 2.74M | int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; |
4310 | 2.74M | struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 }; |
4311 | 2.74M | int64_t best_rd = best_rd_so_far; |
4312 | 2.74M | int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise |
4313 | 2.74M | int64_t best_pred_diff[REFERENCE_MODES]; |
4314 | 2.74M | int64_t best_pred_rd[REFERENCE_MODES]; |
4315 | 2.74M | int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; |
4316 | 2.74M | int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; |
4317 | 2.74M | MODE_INFO best_mbmode; |
4318 | 2.74M | int ref_index, best_ref_index = 0; |
4319 | 2.74M | unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; |
4320 | 2.74M | vpx_prob comp_mode_p; |
4321 | 2.74M | INTERP_FILTER tmp_best_filter = SWITCHABLE; |
4322 | 2.74M | int rate_uv_intra, rate_uv_tokenonly; |
4323 | 2.74M | int64_t dist_uv; |
4324 | 2.74M | int skip_uv; |
4325 | 2.74M | PREDICTION_MODE mode_uv = DC_PRED; |
4326 | 2.74M | const int intra_cost_penalty = |
4327 | 2.74M | vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); |
4328 | 2.74M | int_mv seg_mvs[4][MAX_REF_FRAMES]; |
4329 | 2.74M | b_mode_info best_bmodes[4]; |
4330 | 2.74M | int best_skip2 = 0; |
4331 | 2.74M | int ref_frame_skip_mask[2] = { 0 }; |
4332 | 2.74M | int64_t mask_filter = 0; |
4333 | 2.74M | int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; |
4334 | 2.74M | int internal_active_edge = |
4335 | 2.74M | vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi); |
4336 | 2.74M | const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; |
4337 | | |
4338 | 2.74M | x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; |
4339 | 2.74M | memset(x->zcoeff_blk[TX_4X4], 0, 4); |
4340 | 2.74M | vp9_zero(best_mbmode); |
4341 | | |
4342 | 13.7M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; |
4343 | | |
4344 | 13.7M | for (i = 0; i < 4; i++) { |
4345 | 10.9M | int j; |
4346 | 54.8M | for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV; |
4347 | 10.9M | } |
4348 | | |
4349 | 2.74M | estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, |
4350 | 2.74M | &comp_mode_p); |
4351 | | |
4352 | 10.9M | for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; |
4353 | 13.7M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) |
4354 | 10.9M | best_filter_rd[i] = INT64_MAX; |
4355 | 2.74M | rate_uv_intra = INT_MAX; |
4356 | | |
4357 | 2.74M | rd_cost->rate = INT_MAX; |
4358 | | |
4359 | 10.9M | for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { |
4360 | 8.23M | if (cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) { |
4361 | 6.27M | setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, |
4362 | 6.27M | frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); |
4363 | 6.27M | } else { |
4364 | 1.95M | ref_frame_skip_mask[0] |= (1 << ref_frame); |
4365 | 1.95M | ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; |
4366 | 1.95M | } |
4367 | 8.23M | frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; |
4368 | 8.23M | frame_mv[ZEROMV][ref_frame].as_int = 0; |
4369 | 8.23M | } |
4370 | | |
4371 | 19.2M | for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { |
4372 | 16.4M | int mode_excluded = 0; |
4373 | 16.4M | int64_t this_rd = INT64_MAX; |
4374 | 16.4M | int disable_skip = 0; |
4375 | 16.4M | int compmode_cost = 0; |
4376 | 16.4M | int rate2 = 0, rate_y = 0, rate_uv = 0; |
4377 | 16.4M | int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; |
4378 | 16.4M | int skippable = 0; |
4379 | 16.4M | int this_skip2 = 0; |
4380 | 16.4M | int64_t total_sse = INT_MAX; |
4381 | 16.4M | int early_term = 0; |
4382 | 16.4M | struct buf_2d backup_yv12[2][MAX_MB_PLANE]; |
4383 | | |
4384 | 16.4M | ref_frame = vp9_ref_order[ref_index].ref_frame[0]; |
4385 | 16.4M | second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; |
4386 | | |
4387 | 16.4M | vp9_zero(x->sum_y_eobs); |
4388 | | |
4389 | | #if CONFIG_BETTER_HW_COMPATIBILITY |
4390 | | // forbid 8X4 and 4X8 partitions if any reference frame is scaled. |
4391 | | if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) { |
4392 | | int ref_scaled = ref_frame > INTRA_FRAME && |
4393 | | vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf); |
4394 | | if (second_ref_frame > INTRA_FRAME) |
4395 | | ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf); |
4396 | | if (ref_scaled) continue; |
4397 | | } |
4398 | | #endif |
4399 | | // Look at the reference frame of the best mode so far and set the |
4400 | | // skip mask to look at a subset of the remaining modes. |
4401 | 16.4M | if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) { |
4402 | 2.43M | if (ref_index == 3) { |
4403 | 811k | switch (best_mbmode.ref_frame[0]) { |
4404 | 526k | case INTRA_FRAME: break; |
4405 | 168k | case LAST_FRAME: |
4406 | 168k | ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME); |
4407 | 168k | ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; |
4408 | 168k | break; |
4409 | 82.2k | case GOLDEN_FRAME: |
4410 | 82.2k | ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME); |
4411 | 82.2k | ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; |
4412 | 82.2k | break; |
4413 | 34.4k | case ALTREF_FRAME: |
4414 | 34.4k | ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME); |
4415 | 34.4k | break; |
4416 | 0 | case NO_REF_FRAME: |
4417 | 0 | case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break; |
4418 | 811k | } |
4419 | 811k | } |
4420 | 2.43M | } |
4421 | | |
4422 | 16.4M | if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && |
4423 | 16.4M | (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) |
4424 | 2.72M | continue; |
4425 | | |
4426 | | // Test best rd so far against threshold for trying this mode. |
4427 | 13.7M | if (!internal_active_edge && |
4428 | 13.7M | rd_less_than_thresh(best_rd, |
4429 | 13.7M | rd_opt->threshes[segment_id][bsize][ref_index], |
4430 | 13.7M | &rd_thresh_freq_fact[ref_index])) |
4431 | 5.51M | continue; |
4432 | | |
4433 | | // This is only used in motion vector unit test. |
4434 | 8.22M | if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; |
4435 | | |
4436 | 8.22M | comp_pred = second_ref_frame > INTRA_FRAME; |
4437 | 8.22M | if (comp_pred) { |
4438 | 0 | if (!cpi->allow_comp_inter_inter) continue; |
4439 | | |
4440 | 0 | if (cm->ref_frame_sign_bias[ref_frame] == |
4441 | 0 | cm->ref_frame_sign_bias[second_ref_frame]) |
4442 | 0 | continue; |
4443 | | |
4444 | 0 | if (!(cpi->ref_frame_flags & ref_frame_to_flag(second_ref_frame))) |
4445 | 0 | continue; |
4446 | | // Do not allow compound prediction if the segment level reference frame |
4447 | | // feature is in use as in this case there can only be one reference. |
4448 | 0 | if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; |
4449 | | |
4450 | 0 | if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && |
4451 | 0 | best_mbmode.ref_frame[0] == INTRA_FRAME) |
4452 | 0 | continue; |
4453 | 0 | } |
4454 | | |
4455 | 8.22M | if (comp_pred) |
4456 | 0 | mode_excluded = cm->reference_mode == SINGLE_REFERENCE; |
4457 | 8.22M | else if (ref_frame != INTRA_FRAME) |
4458 | 5.66M | mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; |
4459 | | |
4460 | | // If the segment reference frame feature is enabled.... |
4461 | | // then do nothing if the current ref frame is not allowed.. |
4462 | 8.22M | if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && |
4463 | 8.22M | get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { |
4464 | 0 | continue; |
4465 | | // Disable this drop out case if the ref frame |
4466 | | // segment level feature is enabled for this segment. This is to |
4467 | | // prevent the possibility that we end up unable to pick any mode. |
4468 | 8.22M | } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { |
4469 | | // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, |
4470 | | // unless ARNR filtering is enabled in which case we want |
4471 | | // an unfiltered alternative. We allow near/nearest as well |
4472 | | // because they may result in zero-zero MVs but be cheaper. |
4473 | 8.22M | if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) |
4474 | 0 | continue; |
4475 | 8.22M | } |
4476 | | |
4477 | 8.22M | mi->tx_size = TX_4X4; |
4478 | 8.22M | mi->uv_mode = DC_PRED; |
4479 | 8.22M | mi->ref_frame[0] = ref_frame; |
4480 | 8.22M | mi->ref_frame[1] = second_ref_frame; |
4481 | | // Evaluate all sub-pel filters irrespective of whether we can use |
4482 | | // them for this frame. |
4483 | 8.22M | mi->interp_filter = |
4484 | 8.22M | cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; |
4485 | 8.22M | x->skip = 0; |
4486 | 8.22M | set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); |
4487 | | |
4488 | | // Select prediction reference frames. |
4489 | 32.9M | for (i = 0; i < MAX_MB_PLANE; i++) { |
4490 | 24.6M | xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; |
4491 | 24.6M | if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; |
4492 | 24.6M | } |
4493 | | |
4494 | 8.22M | if (ref_frame == INTRA_FRAME) { |
4495 | 2.56M | int rate; |
4496 | 2.56M | if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, |
4497 | 2.56M | best_rd) >= best_rd) |
4498 | 660k | continue; |
4499 | 1.90M | rate2 += rate; |
4500 | 1.90M | rate2 += intra_cost_penalty; |
4501 | 1.90M | distortion2 += distortion_y; |
4502 | | |
4503 | 1.90M | if (rate_uv_intra == INT_MAX) { |
4504 | 1.90M | choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra, |
4505 | 1.90M | &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv); |
4506 | 1.90M | } |
4507 | 1.90M | rate2 += rate_uv_intra; |
4508 | 1.90M | rate_uv = rate_uv_tokenonly; |
4509 | 1.90M | distortion2 += dist_uv; |
4510 | 1.90M | distortion_uv = dist_uv; |
4511 | 1.90M | mi->uv_mode = mode_uv; |
4512 | 5.66M | } else { |
4513 | 5.66M | int rate; |
4514 | 5.66M | int64_t distortion; |
4515 | 5.66M | int64_t this_rd_thresh; |
4516 | 5.66M | int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; |
4517 | 5.66M | int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; |
4518 | 5.66M | int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; |
4519 | 5.66M | int tmp_best_skippable = 0; |
4520 | 5.66M | int switchable_filter_index; |
4521 | 5.66M | int_mv *second_ref = |
4522 | 5.66M | comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; |
4523 | 5.66M | b_mode_info tmp_best_bmodes[16]; |
4524 | 5.66M | MODE_INFO tmp_best_mbmode; |
4525 | 5.66M | BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; |
4526 | 5.66M | int pred_exists = 0; |
4527 | 5.66M | int uv_skippable; |
4528 | | |
4529 | 5.66M | YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL }; |
4530 | 5.66M | int ref; |
4531 | | |
4532 | 16.9M | for (ref = 0; ref < 2; ++ref) { |
4533 | 11.3M | scaled_ref_frame[ref] = |
4534 | 11.3M | mi->ref_frame[ref] > INTRA_FRAME |
4535 | 11.3M | ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref]) |
4536 | 11.3M | : NULL; |
4537 | | |
4538 | 11.3M | if (scaled_ref_frame[ref]) { |
4539 | | // Swap out the reference frame for a version that's been scaled to |
4540 | | // match the resolution of the current frame, allowing the existing |
4541 | | // motion search code to be used without additional modifications. |
4542 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) |
4543 | 0 | backup_yv12[ref][i] = xd->plane[i].pre[ref]; |
4544 | 0 | vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, |
4545 | 0 | NULL); |
4546 | 0 | } |
4547 | 11.3M | } |
4548 | | |
4549 | 5.66M | this_rd_thresh = (ref_frame == LAST_FRAME) |
4550 | 5.66M | ? rd_opt->threshes[segment_id][bsize][THR_LAST] |
4551 | 5.66M | : rd_opt->threshes[segment_id][bsize][THR_ALTR]; |
4552 | 5.66M | this_rd_thresh = (ref_frame == GOLDEN_FRAME) |
4553 | 5.66M | ? rd_opt->threshes[segment_id][bsize][THR_GOLD] |
4554 | 5.66M | : this_rd_thresh; |
4555 | 28.3M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) |
4556 | 22.6M | filter_cache[i] = INT64_MAX; |
4557 | | |
4558 | 5.66M | if (cm->interp_filter != BILINEAR) { |
4559 | 5.66M | tmp_best_filter = EIGHTTAP; |
4560 | 5.66M | if (x->source_variance < sf->disable_filter_search_var_thresh) { |
4561 | 0 | tmp_best_filter = EIGHTTAP; |
4562 | 5.66M | } else if (sf->adaptive_pred_interp_filter == 1 && |
4563 | 5.66M | ctx->pred_interp_filter < SWITCHABLE) { |
4564 | 5.43M | tmp_best_filter = ctx->pred_interp_filter; |
4565 | 5.43M | } else if (sf->adaptive_pred_interp_filter == 2) { |
4566 | 0 | tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE |
4567 | 0 | ? ctx->pred_interp_filter |
4568 | 0 | : 0; |
4569 | 222k | } else { |
4570 | 222k | for (switchable_filter_index = 0; |
4571 | 890k | switchable_filter_index < SWITCHABLE_FILTERS; |
4572 | 667k | ++switchable_filter_index) { |
4573 | 667k | int newbest, rs; |
4574 | 667k | int64_t rs_rd; |
4575 | 667k | MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; |
4576 | 667k | mi->interp_filter = switchable_filter_index; |
4577 | 667k | tmp_rd = rd_pick_best_sub8x8_mode( |
4578 | 667k | cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, |
4579 | 667k | &rate, &rate_y, &distortion, &skippable, &total_sse, |
4580 | 667k | (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index, |
4581 | 667k | mi_row, mi_col); |
4582 | | |
4583 | 667k | if (tmp_rd == INT64_MAX) continue; |
4584 | 486k | rs = vp9_get_switchable_rate(cpi, xd); |
4585 | 486k | rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); |
4586 | 486k | filter_cache[switchable_filter_index] = tmp_rd; |
4587 | 486k | filter_cache[SWITCHABLE_FILTERS] = |
4588 | 486k | VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); |
4589 | 486k | if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; |
4590 | | |
4591 | 486k | mask_filter = VPXMAX(mask_filter, tmp_rd); |
4592 | | |
4593 | 486k | newbest = (tmp_rd < tmp_best_rd); |
4594 | 486k | if (newbest) { |
4595 | 213k | tmp_best_filter = mi->interp_filter; |
4596 | 213k | tmp_best_rd = tmp_rd; |
4597 | 213k | } |
4598 | 486k | if ((newbest && cm->interp_filter == SWITCHABLE) || |
4599 | 486k | (mi->interp_filter == cm->interp_filter && |
4600 | 389k | cm->interp_filter != SWITCHABLE)) { |
4601 | 185k | tmp_best_rdu = tmp_rd; |
4602 | 185k | tmp_best_rate = rate; |
4603 | 185k | tmp_best_ratey = rate_y; |
4604 | 185k | tmp_best_distortion = distortion; |
4605 | 185k | tmp_best_sse = total_sse; |
4606 | 185k | tmp_best_skippable = skippable; |
4607 | 185k | tmp_best_mbmode = *mi; |
4608 | 185k | x->sum_y_eobs[TX_4X4] = 0; |
4609 | 928k | for (i = 0; i < 4; i++) { |
4610 | 742k | tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; |
4611 | 742k | x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; |
4612 | 742k | x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i]; |
4613 | 742k | } |
4614 | 185k | pred_exists = 1; |
4615 | 185k | if (switchable_filter_index == 0 && sf->use_rd_breakout && |
4616 | 185k | best_rd < INT64_MAX) { |
4617 | 69.8k | if (tmp_best_rdu / 2 > best_rd) { |
4618 | | // skip searching the other filters if the first is |
4619 | | // already substantially larger than the best so far |
4620 | 0 | tmp_best_filter = mi->interp_filter; |
4621 | 0 | tmp_best_rdu = INT64_MAX; |
4622 | 0 | break; |
4623 | 0 | } |
4624 | 69.8k | } |
4625 | 185k | } |
4626 | 486k | } // switchable_filter_index loop |
4627 | 222k | } |
4628 | 5.66M | } |
4629 | | |
4630 | 5.66M | if (tmp_best_rdu == INT64_MAX && pred_exists) continue; |
4631 | | |
4632 | 5.66M | mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter |
4633 | 5.66M | : cm->interp_filter); |
4634 | 5.66M | if (!pred_exists) { |
4635 | | // Handles the special case when a filter that is not in the |
4636 | | // switchable list (bilinear, 6-tap) is indicated at the frame level |
4637 | 5.49M | tmp_rd = rd_pick_best_sub8x8_mode( |
4638 | 5.49M | cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, |
4639 | 5.49M | &rate, &rate_y, &distortion, &skippable, &total_sse, |
4640 | 5.49M | (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col); |
4641 | 5.49M | if (tmp_rd == INT64_MAX) continue; |
4642 | 3.57M | x->sum_y_eobs[TX_4X4] = 0; |
4643 | 17.8M | for (i = 0; i < 4; i++) { |
4644 | 14.2M | x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; |
4645 | 14.2M | x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i]; |
4646 | 14.2M | } |
4647 | 3.57M | } else { |
4648 | 164k | total_sse = tmp_best_sse; |
4649 | 164k | rate = tmp_best_rate; |
4650 | 164k | rate_y = tmp_best_ratey; |
4651 | 164k | distortion = tmp_best_distortion; |
4652 | 164k | skippable = tmp_best_skippable; |
4653 | 164k | *mi = tmp_best_mbmode; |
4654 | 823k | for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; |
4655 | 164k | } |
4656 | | |
4657 | 3.73M | rate2 += rate; |
4658 | 3.73M | distortion2 += distortion; |
4659 | | |
4660 | 3.73M | if (cm->interp_filter == SWITCHABLE) |
4661 | 1.70M | rate2 += vp9_get_switchable_rate(cpi, xd); |
4662 | | |
4663 | 3.73M | if (!mode_excluded) |
4664 | 3.73M | mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE |
4665 | 3.73M | : cm->reference_mode == COMPOUND_REFERENCE; |
4666 | | |
4667 | 3.73M | compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); |
4668 | | |
4669 | 3.73M | tmp_best_rdu = |
4670 | 3.73M | best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), |
4671 | 3.73M | RDCOST(x->rdmult, x->rddiv, 0, total_sse)); |
4672 | | |
4673 | 3.73M | if (tmp_best_rdu > 0) { |
4674 | | // If even the 'Y' rd value of split is higher than best so far |
4675 | | // then don't bother looking at UV |
4676 | 3.72M | vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); |
4677 | 3.72M | memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); |
4678 | 3.72M | if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, |
4679 | 3.72M | &uv_sse, BLOCK_8X8, tmp_best_rdu)) { |
4680 | 3.57M | for (ref = 0; ref < 2; ++ref) { |
4681 | 2.38M | if (scaled_ref_frame[ref]) { |
4682 | 0 | for (i = 0; i < MAX_MB_PLANE; ++i) |
4683 | 0 | xd->plane[i].pre[ref] = backup_yv12[ref][i]; |
4684 | 0 | } |
4685 | 2.38M | } |
4686 | 1.19M | continue; |
4687 | 1.19M | } |
4688 | | |
4689 | 2.53M | rate2 += rate_uv; |
4690 | 2.53M | distortion2 += distortion_uv; |
4691 | 2.53M | skippable = skippable && uv_skippable; |
4692 | 2.53M | total_sse += uv_sse; |
4693 | 2.53M | } |
4694 | | |
4695 | 7.63M | for (ref = 0; ref < 2; ++ref) { |
4696 | 5.09M | if (scaled_ref_frame[ref]) { |
4697 | | // Restore the prediction frame pointers to their unscaled versions. |
4698 | 0 | for (i = 0; i < MAX_MB_PLANE; ++i) |
4699 | 0 | xd->plane[i].pre[ref] = backup_yv12[ref][i]; |
4700 | 0 | } |
4701 | 5.09M | } |
4702 | 2.54M | } |
4703 | | |
4704 | 4.44M | if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; |
4705 | | |
4706 | | // Estimate the reference frame signaling cost and add it |
4707 | | // to the rolling cost variable. |
4708 | 4.44M | if (second_ref_frame > INTRA_FRAME) { |
4709 | 0 | rate2 += ref_costs_comp[ref_frame]; |
4710 | 4.44M | } else { |
4711 | 4.44M | rate2 += ref_costs_single[ref_frame]; |
4712 | 4.44M | } |
4713 | | |
4714 | 4.44M | if (!disable_skip) { |
4715 | 4.44M | const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); |
4716 | 4.44M | const int skip_cost0 = vp9_cost_bit(skip_prob, 0); |
4717 | 4.44M | const int skip_cost1 = vp9_cost_bit(skip_prob, 1); |
4718 | | |
4719 | | // Skip is never coded at the segment level for sub8x8 blocks and instead |
4720 | | // always coded in the bitstream at the mode info level. |
4721 | 4.44M | if (ref_frame != INTRA_FRAME && !xd->lossless) { |
4722 | 2.20M | if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, |
4723 | 2.20M | distortion2) < |
4724 | 2.20M | RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { |
4725 | | // Add in the cost of the no skip flag. |
4726 | 1.99M | rate2 += skip_cost0; |
4727 | 1.99M | } else { |
4728 | | // FIXME(rbultje) make this work for splitmv also |
4729 | 212k | rate2 += skip_cost1; |
4730 | 212k | distortion2 = total_sse; |
4731 | 212k | assert(total_sse >= 0); |
4732 | 212k | rate2 -= (rate_y + rate_uv); |
4733 | 212k | rate_y = 0; |
4734 | 212k | rate_uv = 0; |
4735 | 212k | this_skip2 = 1; |
4736 | 212k | } |
4737 | 2.24M | } else { |
4738 | | // Add in the cost of the no skip flag. |
4739 | 2.24M | rate2 += skip_cost0; |
4740 | 2.24M | } |
4741 | | |
4742 | | // Calculate the final RD estimate for this mode. |
4743 | 4.44M | this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); |
4744 | 4.44M | } |
4745 | | |
4746 | 4.44M | if (!disable_skip && ref_frame == INTRA_FRAME) { |
4747 | 7.61M | for (i = 0; i < REFERENCE_MODES; ++i) |
4748 | 5.70M | best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); |
4749 | 9.51M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) |
4750 | 7.61M | best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); |
4751 | 1.90M | } |
4752 | | |
4753 | | // Did this mode help.. i.e. is it the new best mode |
4754 | 4.44M | if (this_rd < best_rd || x->skip) { |
4755 | 1.43M | if (!mode_excluded) { |
4756 | 1.43M | int max_plane = MAX_MB_PLANE; |
4757 | | // Note index of best mode so far |
4758 | 1.43M | best_ref_index = ref_index; |
4759 | | |
4760 | 1.43M | if (ref_frame == INTRA_FRAME) { |
4761 | | /* required for left and above block mv */ |
4762 | 788k | mi->mv[0].as_int = 0; |
4763 | 788k | max_plane = 1; |
4764 | | // Initialize interp_filter here so we do not have to check for |
4765 | | // inter block modes in get_pred_context_switchable_interp() |
4766 | 788k | mi->interp_filter = SWITCHABLE_FILTERS; |
4767 | 788k | } |
4768 | | |
4769 | 1.43M | rd_cost->rate = rate2; |
4770 | 1.43M | rd_cost->dist = distortion2; |
4771 | 1.43M | rd_cost->rdcost = this_rd; |
4772 | 1.43M | best_rd = this_rd; |
4773 | 1.43M | best_yrd = |
4774 | 1.43M | best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); |
4775 | 1.43M | best_mbmode = *mi; |
4776 | 1.43M | best_skip2 = this_skip2; |
4777 | 1.43M | if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); |
4778 | 1.43M | memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], |
4779 | 1.43M | sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); |
4780 | 1.43M | ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4]; |
4781 | | |
4782 | 7.17M | for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i]; |
4783 | | |
4784 | | // TODO(debargha): enhance this test with a better distortion prediction |
4785 | | // based on qp, activity mask and history |
4786 | 1.43M | if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) && |
4787 | 1.43M | (ref_index > MIN_EARLY_TERM_INDEX)) { |
4788 | 0 | int qstep = xd->plane[0].dequant[1]; |
4789 | | // TODO(debargha): Enhance this by specializing for each mode_index |
4790 | 0 | int scale = 4; |
4791 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
4792 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
4793 | 0 | qstep >>= (xd->bd - 8); |
4794 | 0 | } |
4795 | 0 | #endif // CONFIG_VP9_HIGHBITDEPTH |
4796 | 0 | if (x->source_variance < UINT_MAX) { |
4797 | 0 | const int var_adjust = (x->source_variance < 16); |
4798 | 0 | scale -= var_adjust; |
4799 | 0 | } |
4800 | 0 | if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) { |
4801 | 0 | early_term = 1; |
4802 | 0 | } |
4803 | 0 | } |
4804 | 1.43M | } |
4805 | 1.43M | } |
4806 | | |
4807 | | /* keep record of best compound/single-only prediction */ |
4808 | 4.44M | if (!disable_skip && ref_frame != INTRA_FRAME) { |
4809 | 2.54M | int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; |
4810 | | |
4811 | 2.54M | if (cm->reference_mode == REFERENCE_MODE_SELECT) { |
4812 | 0 | single_rate = rate2 - compmode_cost; |
4813 | 0 | hybrid_rate = rate2; |
4814 | 2.54M | } else { |
4815 | 2.54M | single_rate = rate2; |
4816 | 2.54M | hybrid_rate = rate2 + compmode_cost; |
4817 | 2.54M | } |
4818 | | |
4819 | 2.54M | single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); |
4820 | 2.54M | hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); |
4821 | | |
4822 | 2.54M | if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) |
4823 | 2.01M | best_pred_rd[SINGLE_REFERENCE] = single_rd; |
4824 | 530k | else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) |
4825 | 0 | best_pred_rd[COMPOUND_REFERENCE] = single_rd; |
4826 | | |
4827 | 2.54M | if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) |
4828 | 2.01M | best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; |
4829 | 2.54M | } |
4830 | | |
4831 | | /* keep record of best filter type */ |
4832 | 4.44M | if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && |
4833 | 4.44M | cm->interp_filter != BILINEAR) { |
4834 | 2.54M | int64_t ref = |
4835 | 2.54M | filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS |
4836 | 2.54M | : cm->interp_filter]; |
4837 | 2.54M | int64_t adj_rd; |
4838 | 12.7M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { |
4839 | 10.1M | if (ref == INT64_MAX) |
4840 | 9.53M | adj_rd = 0; |
4841 | 651k | else if (filter_cache[i] == INT64_MAX) |
4842 | | // when early termination is triggered, the encoder does not have |
4843 | | // access to the rate-distortion cost. it only knows that the cost |
4844 | | // should be above the maximum valid value. hence it takes the known |
4845 | | // maximum plus an arbitrary constant as the rate-distortion cost. |
4846 | 10.2k | adj_rd = mask_filter - ref + 10; |
4847 | 641k | else |
4848 | 641k | adj_rd = filter_cache[i] - ref; |
4849 | | |
4850 | 10.1M | adj_rd += this_rd; |
4851 | 10.1M | best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); |
4852 | 10.1M | } |
4853 | 2.54M | } |
4854 | | |
4855 | 4.44M | if (early_term) break; |
4856 | | |
4857 | 4.44M | if (x->skip && !comp_pred) break; |
4858 | 4.44M | } |
4859 | | |
4860 | 2.74M | if (best_rd >= best_rd_so_far) { |
4861 | 1.55M | rd_cost->rate = INT_MAX; |
4862 | 1.55M | rd_cost->rdcost = INT64_MAX; |
4863 | 1.55M | return; |
4864 | 1.55M | } |
4865 | | |
4866 | | // If we used an estimate for the uv intra rd in the loop above... |
4867 | 1.19M | if (sf->use_uv_intra_rd_estimate) { |
4868 | | // Do Intra UV best rd mode selection if best mode choice above was intra. |
4869 | 0 | if (best_mbmode.ref_frame[0] == INTRA_FRAME) { |
4870 | 0 | *mi = best_mbmode; |
4871 | 0 | rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly, |
4872 | 0 | &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4); |
4873 | 0 | } |
4874 | 0 | } |
4875 | | |
4876 | 1.19M | if (best_rd == INT64_MAX) { |
4877 | 0 | rd_cost->rate = INT_MAX; |
4878 | 0 | rd_cost->dist = INT64_MAX; |
4879 | 0 | rd_cost->rdcost = INT64_MAX; |
4880 | 0 | return; |
4881 | 0 | } |
4882 | | |
4883 | 1.19M | assert((cm->interp_filter == SWITCHABLE) || |
4884 | 1.19M | (cm->interp_filter == best_mbmode.interp_filter) || |
4885 | 1.19M | !is_inter_block(&best_mbmode)); |
4886 | | |
4887 | 1.19M | vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh, |
4888 | 1.19M | bsize, best_ref_index); |
4889 | | |
4890 | | // macroblock modes |
4891 | 1.19M | *mi = best_mbmode; |
4892 | 1.19M | x->skip |= best_skip2; |
4893 | 1.19M | if (!is_inter_block(&best_mbmode)) { |
4894 | 3.94M | for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; |
4895 | 788k | } else { |
4896 | 2.01M | for (i = 0; i < 4; ++i) |
4897 | 1.61M | memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); |
4898 | | |
4899 | 403k | mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; |
4900 | 403k | mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; |
4901 | 403k | } |
4902 | | // If the second reference does not exist, set the corresponding mv to zero. |
4903 | 1.19M | if (mi->ref_frame[1] == NO_REF_FRAME) { |
4904 | 1.19M | mi->mv[1].as_int = 0; |
4905 | 5.95M | for (i = 0; i < 4; ++i) { |
4906 | 4.76M | mi->bmi[i].as_mv[1].as_int = 0; |
4907 | 4.76M | } |
4908 | 1.19M | } |
4909 | | |
4910 | 4.76M | for (i = 0; i < REFERENCE_MODES; ++i) { |
4911 | 3.57M | if (best_pred_rd[i] == INT64_MAX) |
4912 | 223k | best_pred_diff[i] = INT_MIN; |
4913 | 3.35M | else |
4914 | 3.35M | best_pred_diff[i] = best_rd - best_pred_rd[i]; |
4915 | 3.57M | } |
4916 | | |
4917 | 1.19M | if (!x->skip) { |
4918 | 5.87M | for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { |
4919 | 4.69M | if (best_filter_rd[i] == INT64_MAX) |
4920 | 0 | best_filter_diff[i] = 0; |
4921 | 4.69M | else |
4922 | 4.69M | best_filter_diff[i] = best_rd - best_filter_rd[i]; |
4923 | 4.69M | } |
4924 | 1.17M | if (cm->interp_filter == SWITCHABLE) |
4925 | 673k | assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); |
4926 | 1.17M | } else { |
4927 | 17.4k | vp9_zero(best_filter_diff); |
4928 | 17.4k | } |
4929 | | |
4930 | 1.19M | store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff, |
4931 | 1.19M | 0); |
4932 | 1.19M | } |
4933 | | #endif // !CONFIG_REALTIME_ONLY |