/src/libvpx/vp8/encoder/rdopt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <assert.h> |
12 | | #include <stdio.h> |
13 | | #include <math.h> |
14 | | #include <limits.h> |
15 | | #include <assert.h> |
16 | | #include "vpx_config.h" |
17 | | #include "vp8_rtcd.h" |
18 | | #include "./vpx_dsp_rtcd.h" |
19 | | #include "encodeframe.h" |
20 | | #include "tokenize.h" |
21 | | #include "treewriter.h" |
22 | | #include "onyx_int.h" |
23 | | #include "modecosts.h" |
24 | | #include "encodeintra.h" |
25 | | #include "pickinter.h" |
26 | | #include "vp8/common/common.h" |
27 | | #include "vp8/common/entropymode.h" |
28 | | #include "vp8/common/reconinter.h" |
29 | | #include "vp8/common/reconintra.h" |
30 | | #include "vp8/common/reconintra4x4.h" |
31 | | #include "vp8/common/findnearmv.h" |
32 | | #include "vp8/common/quant_common.h" |
33 | | #include "encodemb.h" |
34 | | #include "vp8/encoder/quantize.h" |
35 | | #include "vpx_dsp/variance.h" |
36 | | #include "vpx_ports/system_state.h" |
37 | | #include "mcomp.h" |
38 | | #include "rdopt.h" |
39 | | #include "vpx_mem/vpx_mem.h" |
40 | | #include "vp8/common/systemdependent.h" |
41 | | #if CONFIG_TEMPORAL_DENOISING |
42 | | #include "denoising.h" |
43 | | #endif |
44 | | extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); |
45 | | |
46 | 1.01M | #define MAXF(a, b) (((a) > (b)) ? (a) : (b)) |
47 | | |
48 | | typedef struct rate_distortion_struct { |
49 | | int rate2; |
50 | | int rate_y; |
51 | | int rate_uv; |
52 | | int distortion2; |
53 | | int distortion_uv; |
54 | | } RATE_DISTORTION; |
55 | | |
56 | | typedef struct best_mode_struct { |
57 | | int yrd; |
58 | | int rd; |
59 | | int intra_rd; |
60 | | MB_MODE_INFO mbmode; |
61 | | union b_mode_info bmodes[16]; |
62 | | PARTITION_INFO partition; |
63 | | } BEST_MODE; |
64 | | |
65 | | static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125, |
66 | | 120, 115, 115, 115, 115, 115, |
67 | | 115, 115, 115, 115, 105 }; |
68 | | |
69 | | const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = { |
70 | | ZEROMV, DC_PRED, |
71 | | |
72 | | NEARESTMV, NEARMV, |
73 | | |
74 | | ZEROMV, NEARESTMV, |
75 | | |
76 | | ZEROMV, NEARESTMV, |
77 | | |
78 | | NEARMV, NEARMV, |
79 | | |
80 | | V_PRED, H_PRED, TM_PRED, |
81 | | |
82 | | NEWMV, NEWMV, NEWMV, |
83 | | |
84 | | SPLITMV, SPLITMV, SPLITMV, |
85 | | |
86 | | B_PRED, |
87 | | }; |
88 | | |
89 | | /* This table determines the search order in reference frame priority order, |
90 | | * which may not necessarily match INTRA,LAST,GOLDEN,ARF |
91 | | */ |
92 | | const int vp8_ref_frame_order[MAX_MODES] = { |
93 | | 1, 0, |
94 | | |
95 | | 1, 1, |
96 | | |
97 | | 2, 2, |
98 | | |
99 | | 3, 3, |
100 | | |
101 | | 2, 3, |
102 | | |
103 | | 0, 0, 0, |
104 | | |
105 | | 1, 2, 3, |
106 | | |
107 | | 1, 2, 3, |
108 | | |
109 | | 0, |
110 | | }; |
111 | | |
112 | | static void fill_token_costs( |
113 | | int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], |
114 | | const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] |
115 | 88.0k | [ENTROPY_NODES]) { |
116 | 88.0k | int i, j, k; |
117 | | |
118 | 440k | for (i = 0; i < BLOCK_TYPES; ++i) { |
119 | 3.17M | for (j = 0; j < COEF_BANDS; ++j) { |
120 | 11.2M | for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { |
121 | | /* check for pt=0 and band > 1 if block type 0 |
122 | | * and 0 if blocktype 1 |
123 | | */ |
124 | 8.45M | if (k == 0 && j > (i == 0)) { |
125 | 2.37M | vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2); |
126 | 6.07M | } else { |
127 | 6.07M | vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree); |
128 | 6.07M | } |
129 | 8.45M | } |
130 | 2.81M | } |
131 | 352k | } |
132 | 88.0k | } |
133 | | |
134 | | static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, |
135 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
136 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
137 | | |
138 | | /* values are now correlated to quantizer */ |
139 | | static const int sad_per_bit16lut[QINDEX_RANGE] = { |
140 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, |
141 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, |
142 | | 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, |
143 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
144 | | 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, |
145 | | 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, |
146 | | 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14 |
147 | | }; |
148 | | static const int sad_per_bit4lut[QINDEX_RANGE] = { |
149 | | 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
150 | | 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, |
151 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
152 | | 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, |
153 | | 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, |
154 | | 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, |
155 | | 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, |
156 | | }; |
157 | | |
158 | 88.0k | void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) { |
159 | 88.0k | cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]; |
160 | 88.0k | cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; |
161 | 88.0k | } |
162 | | |
163 | 88.0k | void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) { |
164 | 88.0k | int q; |
165 | 88.0k | int i; |
166 | 88.0k | double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; |
167 | 88.0k | double rdconst = 2.80; |
168 | | |
169 | 88.0k | vpx_clear_system_state(); |
170 | | |
171 | | /* Further tests required to see if optimum is different |
172 | | * for key frames, golden frames and arf frames. |
173 | | */ |
174 | 88.0k | cpi->RDMULT = (int)(rdconst * (capped_q * capped_q)); |
175 | | |
176 | | /* Extend rate multiplier along side quantizer zbin increases */ |
177 | 88.0k | if (cpi->mb.zbin_over_quant > 0) { |
178 | 21.4k | double oq_factor; |
179 | 21.4k | double modq; |
180 | | |
181 | | /* Experimental code using the same basic equation as used for Q above |
182 | | * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size |
183 | | */ |
184 | 21.4k | oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant); |
185 | 21.4k | modq = (int)((double)capped_q * oq_factor); |
186 | 21.4k | cpi->RDMULT = (int)(rdconst * (modq * modq)); |
187 | 21.4k | } |
188 | | |
189 | 88.0k | if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { |
190 | 0 | if (cpi->twopass.next_iiratio > 31) { |
191 | 0 | cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; |
192 | 0 | } else { |
193 | 0 | cpi->RDMULT += |
194 | 0 | (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; |
195 | 0 | } |
196 | 0 | } |
197 | | |
198 | 88.0k | cpi->mb.errorperbit = (cpi->RDMULT / 110); |
199 | 88.0k | cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); |
200 | | |
201 | 88.0k | vp8_set_speed_features(cpi); |
202 | | |
203 | 1.84M | for (i = 0; i < MAX_MODES; ++i) { |
204 | 1.76M | x->mode_test_hit_counts[i] = 0; |
205 | 1.76M | } |
206 | | |
207 | 88.0k | q = (int)pow(Qvalue, 1.25); |
208 | | |
209 | 88.0k | if (q < 8) q = 8; |
210 | | |
211 | 88.0k | if (cpi->RDMULT > 1000) { |
212 | 49.1k | cpi->RDDIV = 1; |
213 | 49.1k | cpi->RDMULT /= 100; |
214 | | |
215 | 1.03M | for (i = 0; i < MAX_MODES; ++i) { |
216 | 983k | if (cpi->sf.thresh_mult[i] < INT_MAX) { |
217 | 944k | x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; |
218 | 944k | } else { |
219 | 38.6k | x->rd_threshes[i] = INT_MAX; |
220 | 38.6k | } |
221 | | |
222 | 983k | cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; |
223 | 983k | } |
224 | 49.1k | } else { |
225 | 38.9k | cpi->RDDIV = 100; |
226 | | |
227 | 817k | for (i = 0; i < MAX_MODES; ++i) { |
228 | 778k | if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { |
229 | 719k | x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; |
230 | 719k | } else { |
231 | 59.1k | x->rd_threshes[i] = INT_MAX; |
232 | 59.1k | } |
233 | | |
234 | 778k | cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; |
235 | 778k | } |
236 | 38.9k | } |
237 | | |
238 | 88.0k | { |
239 | | /* build token cost array for the type of frame we have now */ |
240 | 88.0k | FRAME_CONTEXT *l = &cpi->lfc_n; |
241 | | |
242 | 88.0k | if (cpi->common.refresh_alt_ref_frame) { |
243 | 18.6k | l = &cpi->lfc_a; |
244 | 69.4k | } else if (cpi->common.refresh_golden_frame) { |
245 | 7.14k | l = &cpi->lfc_g; |
246 | 7.14k | } |
247 | | |
248 | 88.0k | fill_token_costs(cpi->mb.token_costs, |
249 | 88.0k | (const vp8_prob(*)[8][3][11])l->coef_probs); |
250 | | /* |
251 | | fill_token_costs( |
252 | | cpi->mb.token_costs, |
253 | | (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs); |
254 | | */ |
255 | | |
256 | | /* TODO make these mode costs depend on last,alt or gold too. (jbb) */ |
257 | 88.0k | vp8_init_mode_costs(cpi); |
258 | 88.0k | } |
259 | 88.0k | } |
260 | | |
261 | 32.6k | void vp8_auto_select_speed(VP8_COMP *cpi) { |
262 | 32.6k | int milliseconds_for_compress = (int)(1000000 / cpi->framerate); |
263 | | |
264 | 32.6k | milliseconds_for_compress = |
265 | 32.6k | milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16; |
266 | | |
267 | | #if 0 |
268 | | |
269 | | if (0) |
270 | | { |
271 | | FILE *f; |
272 | | |
273 | | f = fopen("speed.stt", "a"); |
274 | | fprintf(f, " %8ld %10ld %10ld %10ld\n", |
275 | | cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time); |
276 | | fclose(f); |
277 | | } |
278 | | |
279 | | #endif |
280 | | |
281 | 32.6k | if (cpi->avg_pick_mode_time < milliseconds_for_compress && |
282 | 32.6k | (cpi->avg_encode_time - cpi->avg_pick_mode_time) < |
283 | 32.6k | milliseconds_for_compress) { |
284 | 32.6k | if (cpi->avg_pick_mode_time == 0) { |
285 | 2.36k | cpi->Speed = 4; |
286 | 30.2k | } else { |
287 | 30.2k | if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) { |
288 | 16 | cpi->Speed += 2; |
289 | 16 | cpi->avg_pick_mode_time = 0; |
290 | 16 | cpi->avg_encode_time = 0; |
291 | | |
292 | 16 | if (cpi->Speed > 16) { |
293 | 0 | cpi->Speed = 16; |
294 | 0 | } |
295 | 16 | } |
296 | | |
297 | 30.2k | if (milliseconds_for_compress * 100 > |
298 | 30.2k | cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) { |
299 | 29.9k | cpi->Speed -= 1; |
300 | 29.9k | cpi->avg_pick_mode_time = 0; |
301 | 29.9k | cpi->avg_encode_time = 0; |
302 | | |
303 | | /* In real-time mode, cpi->speed is in [4, 16]. */ |
304 | 29.9k | if (cpi->Speed < 4) { |
305 | 29.9k | cpi->Speed = 4; |
306 | 29.9k | } |
307 | 29.9k | } |
308 | 30.2k | } |
309 | 32.6k | } else { |
310 | 0 | cpi->Speed += 4; |
311 | |
|
312 | 0 | if (cpi->Speed > 16) cpi->Speed = 16; |
313 | |
|
314 | 0 | cpi->avg_pick_mode_time = 0; |
315 | 0 | cpi->avg_encode_time = 0; |
316 | 0 | } |
317 | 32.6k | } |
318 | | |
319 | 0 | int vp8_block_error_c(short *coeff, short *dqcoeff) { |
320 | 0 | int i; |
321 | 0 | int error = 0; |
322 | |
|
323 | 0 | for (i = 0; i < 16; ++i) { |
324 | 0 | int this_diff = coeff[i] - dqcoeff[i]; |
325 | 0 | error += this_diff * this_diff; |
326 | 0 | } |
327 | |
|
328 | 0 | return error; |
329 | 0 | } |
330 | | |
331 | 0 | int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) { |
332 | 0 | BLOCK *be; |
333 | 0 | BLOCKD *bd; |
334 | 0 | int i, j; |
335 | 0 | int berror, error = 0; |
336 | |
|
337 | 0 | for (i = 0; i < 16; ++i) { |
338 | 0 | be = &mb->block[i]; |
339 | 0 | bd = &mb->e_mbd.block[i]; |
340 | |
|
341 | 0 | berror = 0; |
342 | |
|
343 | 0 | for (j = dc; j < 16; ++j) { |
344 | 0 | int this_diff = be->coeff[j] - bd->dqcoeff[j]; |
345 | 0 | berror += this_diff * this_diff; |
346 | 0 | } |
347 | |
|
348 | 0 | error += berror; |
349 | 0 | } |
350 | |
|
351 | 0 | return error; |
352 | 0 | } |
353 | | |
354 | 0 | int vp8_mbuverror_c(MACROBLOCK *mb) { |
355 | 0 | BLOCK *be; |
356 | 0 | BLOCKD *bd; |
357 | |
|
358 | 0 | int i; |
359 | 0 | int error = 0; |
360 | |
|
361 | 0 | for (i = 16; i < 24; ++i) { |
362 | 0 | be = &mb->block[i]; |
363 | 0 | bd = &mb->e_mbd.block[i]; |
364 | |
|
365 | 0 | error += vp8_block_error_c(be->coeff, bd->dqcoeff); |
366 | 0 | } |
367 | |
|
368 | 0 | return error; |
369 | 0 | } |
370 | | |
371 | 14.5k | int VP8_UVSSE(MACROBLOCK *x) { |
372 | 14.5k | unsigned char *uptr, *vptr; |
373 | 14.5k | unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); |
374 | 14.5k | unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); |
375 | 14.5k | int uv_stride = x->block[16].src_stride; |
376 | | |
377 | 14.5k | unsigned int sse1 = 0; |
378 | 14.5k | unsigned int sse2 = 0; |
379 | 14.5k | int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row; |
380 | 14.5k | int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col; |
381 | 14.5k | int offset; |
382 | 14.5k | int pre_stride = x->e_mbd.pre.uv_stride; |
383 | | |
384 | 14.5k | if (mv_row < 0) { |
385 | 566 | mv_row -= 1; |
386 | 13.9k | } else { |
387 | 13.9k | mv_row += 1; |
388 | 13.9k | } |
389 | | |
390 | 14.5k | if (mv_col < 0) { |
391 | 855 | mv_col -= 1; |
392 | 13.7k | } else { |
393 | 13.7k | mv_col += 1; |
394 | 13.7k | } |
395 | | |
396 | 14.5k | mv_row /= 2; |
397 | 14.5k | mv_col /= 2; |
398 | | |
399 | 14.5k | offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); |
400 | 14.5k | uptr = x->e_mbd.pre.u_buffer + offset; |
401 | 14.5k | vptr = x->e_mbd.pre.v_buffer + offset; |
402 | | |
403 | 14.5k | if ((mv_row | mv_col) & 7) { |
404 | 1.73k | vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, |
405 | 1.73k | upred_ptr, uv_stride, &sse2); |
406 | 1.73k | vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, |
407 | 1.73k | vpred_ptr, uv_stride, &sse1); |
408 | 1.73k | sse2 += sse1; |
409 | 12.8k | } else { |
410 | 12.8k | vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); |
411 | 12.8k | vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); |
412 | 12.8k | sse2 += sse1; |
413 | 12.8k | } |
414 | 14.5k | return sse2; |
415 | 14.5k | } |
416 | | |
417 | | static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, |
418 | 292M | ENTROPY_CONTEXT *l) { |
419 | 292M | int c = !type; /* start at coef 0, unless Y with Y2 */ |
420 | 292M | int eob = (int)(*b->eob); |
421 | 292M | int pt; /* surrounding block/prev coef predictor */ |
422 | 292M | int cost = 0; |
423 | 292M | short *qcoeff_ptr = b->qcoeff; |
424 | | |
425 | 292M | VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); |
426 | | |
427 | 292M | assert(eob <= 16); |
428 | 2.80G | for (; c < eob; ++c) { |
429 | 2.51G | const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]]; |
430 | 2.51G | const int t = vp8_dct_value_tokens_ptr[v].Token; |
431 | 2.51G | cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t]; |
432 | 2.51G | cost += vp8_dct_value_cost_ptr[v]; |
433 | 2.51G | pt = vp8_prev_token_class[t]; |
434 | 2.51G | } |
435 | | |
436 | 292M | if (c < 16) { |
437 | 196M | cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN]; |
438 | 196M | } |
439 | | |
440 | 292M | pt = (c != !type); /* is eob first coefficient; */ |
441 | 292M | *a = *l = pt; |
442 | | |
443 | 292M | return cost; |
444 | 292M | } |
445 | | |
446 | 5.60M | static int vp8_rdcost_mby(MACROBLOCK *mb) { |
447 | 5.60M | int cost = 0; |
448 | 5.60M | int b; |
449 | 5.60M | MACROBLOCKD *x = &mb->e_mbd; |
450 | 5.60M | ENTROPY_CONTEXT_PLANES t_above, t_left; |
451 | 5.60M | ENTROPY_CONTEXT *ta; |
452 | 5.60M | ENTROPY_CONTEXT *tl; |
453 | | |
454 | 5.60M | memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
455 | 5.60M | memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
456 | | |
457 | 5.60M | ta = (ENTROPY_CONTEXT *)&t_above; |
458 | 5.60M | tl = (ENTROPY_CONTEXT *)&t_left; |
459 | | |
460 | 95.2M | for (b = 0; b < 16; ++b) { |
461 | 89.6M | cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC, |
462 | 89.6M | ta + vp8_block2above[b], tl + vp8_block2left[b]); |
463 | 89.6M | } |
464 | | |
465 | 5.60M | cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2, |
466 | 5.60M | ta + vp8_block2above[24], tl + vp8_block2left[24]); |
467 | | |
468 | 5.60M | return cost; |
469 | 5.60M | } |
470 | | |
471 | 5.60M | static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) { |
472 | 5.60M | int b; |
473 | 5.60M | MACROBLOCKD *const x = &mb->e_mbd; |
474 | 5.60M | BLOCK *const mb_y2 = mb->block + 24; |
475 | 5.60M | BLOCKD *const x_y2 = x->block + 24; |
476 | 5.60M | short *Y2DCPtr = mb_y2->src_diff; |
477 | 5.60M | BLOCK *beptr; |
478 | 5.60M | int d; |
479 | | |
480 | 5.60M | vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src), |
481 | 5.60M | mb->block[0].src_stride, mb->e_mbd.predictor, 16); |
482 | | |
483 | | /* Fdct and building the 2nd order block */ |
484 | 50.4M | for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { |
485 | 44.8M | mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32); |
486 | 44.8M | *Y2DCPtr++ = beptr->coeff[0]; |
487 | 44.8M | *Y2DCPtr++ = beptr->coeff[16]; |
488 | 44.8M | } |
489 | | |
490 | | /* 2nd order fdct */ |
491 | 5.60M | mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); |
492 | | |
493 | | /* Quantization */ |
494 | 95.2M | for (b = 0; b < 16; ++b) { |
495 | 89.6M | mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); |
496 | 89.6M | } |
497 | | |
498 | | /* DC predication and Quantization of 2nd Order block */ |
499 | 5.60M | mb->quantize_b(mb_y2, x_y2); |
500 | | |
501 | | /* Distortion */ |
502 | 5.60M | d = vp8_mbblock_error(mb, 1) << 2; |
503 | 5.60M | d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff); |
504 | | |
505 | 5.60M | *Distortion = (d >> 4); |
506 | | |
507 | | /* rate */ |
508 | 5.60M | *Rate = vp8_rdcost_mby(mb); |
509 | 5.60M | } |
510 | | |
511 | 15.6M | static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { |
512 | 15.6M | const unsigned int *p = (const unsigned int *)predictor; |
513 | 15.6M | unsigned int *d = (unsigned int *)dst; |
514 | 15.6M | d[0] = p[0]; |
515 | 15.6M | d[4] = p[4]; |
516 | 15.6M | d[8] = p[8]; |
517 | 15.6M | d[12] = p[12]; |
518 | 15.6M | } |
519 | | static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b, |
520 | | B_PREDICTION_MODE *best_mode, |
521 | | const int *bmode_costs, ENTROPY_CONTEXT *a, |
522 | | ENTROPY_CONTEXT *l, |
523 | | |
524 | | int *bestrate, int *bestratey, |
525 | 8.31M | int *bestdistortion) { |
526 | 8.31M | B_PREDICTION_MODE mode; |
527 | 8.31M | int best_rd = INT_MAX; |
528 | 8.31M | int rate = 0; |
529 | 8.31M | int distortion; |
530 | | |
531 | 8.31M | ENTROPY_CONTEXT ta = *a, tempa = *a; |
532 | 8.31M | ENTROPY_CONTEXT tl = *l, templ = *l; |
533 | | /* |
534 | | * The predictor buffer is a 2d buffer with a stride of 16. Create |
535 | | * a temp buffer that meets the stride requirements, but we are only |
536 | | * interested in the left 4x4 block |
537 | | * */ |
538 | 8.31M | DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]); |
539 | 8.31M | DECLARE_ALIGNED(16, short, best_dqcoeff[16]); |
540 | 8.31M | int dst_stride = x->e_mbd.dst.y_stride; |
541 | 8.31M | unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; |
542 | | |
543 | 8.31M | unsigned char *Above = dst - dst_stride; |
544 | 8.31M | unsigned char *yleft = dst - 1; |
545 | 8.31M | unsigned char top_left = Above[-1]; |
546 | | |
547 | 91.4M | for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) { |
548 | 83.1M | int this_rd; |
549 | 83.1M | int ratey; |
550 | | |
551 | 83.1M | rate = bmode_costs[mode]; |
552 | | |
553 | 83.1M | vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16, |
554 | 83.1M | top_left); |
555 | 83.1M | vp8_subtract_b(be, b, 16); |
556 | 83.1M | x->short_fdct4x4(be->src_diff, be->coeff, 32); |
557 | 83.1M | x->quantize_b(be, b); |
558 | | |
559 | 83.1M | tempa = ta; |
560 | 83.1M | templ = tl; |
561 | | |
562 | 83.1M | ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ); |
563 | 83.1M | rate += ratey; |
564 | 83.1M | distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2; |
565 | | |
566 | 83.1M | this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); |
567 | | |
568 | 83.1M | if (this_rd < best_rd) { |
569 | 15.6M | *bestrate = rate; |
570 | 15.6M | *bestratey = ratey; |
571 | 15.6M | *bestdistortion = distortion; |
572 | 15.6M | best_rd = this_rd; |
573 | 15.6M | *best_mode = mode; |
574 | 15.6M | *a = tempa; |
575 | 15.6M | *l = templ; |
576 | 15.6M | copy_predictor(best_predictor, b->predictor); |
577 | 15.6M | memcpy(best_dqcoeff, b->dqcoeff, 32); |
578 | 15.6M | } |
579 | 83.1M | } |
580 | 8.31M | b->bmi.as_mode = *best_mode; |
581 | | |
582 | 8.31M | vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride); |
583 | | |
584 | 8.31M | return best_rd; |
585 | 8.31M | } |
586 | | |
587 | | static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y, |
588 | 737k | int *Distortion, int best_rd) { |
589 | 737k | MACROBLOCKD *const xd = &mb->e_mbd; |
590 | 737k | int i; |
591 | 737k | int cost = mb->mbmode_cost[xd->frame_type][B_PRED]; |
592 | 737k | int distortion = 0; |
593 | 737k | int tot_rate_y = 0; |
594 | 737k | int64_t total_rd = 0; |
595 | 737k | ENTROPY_CONTEXT_PLANES t_above, t_left; |
596 | 737k | ENTROPY_CONTEXT *ta; |
597 | 737k | ENTROPY_CONTEXT *tl; |
598 | 737k | const int *bmode_costs; |
599 | | |
600 | 737k | memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
601 | 737k | memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
602 | | |
603 | 737k | ta = (ENTROPY_CONTEXT *)&t_above; |
604 | 737k | tl = (ENTROPY_CONTEXT *)&t_left; |
605 | | |
606 | 737k | intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); |
607 | | |
608 | 737k | bmode_costs = mb->inter_bmode_costs; |
609 | | |
610 | 8.65M | for (i = 0; i < 16; ++i) { |
611 | 8.31M | MODE_INFO *const mic = xd->mode_info_context; |
612 | 8.31M | const int mis = xd->mode_info_stride; |
613 | 8.31M | B_PREDICTION_MODE best_mode = B_MODE_COUNT; |
614 | 8.31M | int r = 0, ry = 0, d = 0; |
615 | | |
616 | 8.31M | if (mb->e_mbd.frame_type == KEY_FRAME) { |
617 | 3.92M | const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); |
618 | 3.92M | const B_PREDICTION_MODE L = left_block_mode(mic, i); |
619 | | |
620 | 3.92M | bmode_costs = mb->bmode_costs[A][L]; |
621 | 3.92M | } |
622 | | |
623 | 8.31M | total_rd += rd_pick_intra4x4block( |
624 | 8.31M | mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, |
625 | 8.31M | ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d); |
626 | | |
627 | 8.31M | cost += r; |
628 | 8.31M | distortion += d; |
629 | 8.31M | tot_rate_y += ry; |
630 | | |
631 | 8.31M | assert(best_mode != B_MODE_COUNT); |
632 | 8.31M | mic->bmi[i].as_mode = best_mode; |
633 | | |
634 | 8.31M | if (total_rd >= (int64_t)best_rd) break; |
635 | 8.31M | } |
636 | | |
637 | 737k | if (total_rd >= (int64_t)best_rd) return INT_MAX; |
638 | | |
639 | 338k | *Rate = cost; |
640 | 338k | *rate_y = tot_rate_y; |
641 | 338k | *Distortion = distortion; |
642 | | |
643 | 338k | return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); |
644 | 737k | } |
645 | | |
646 | | static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y, |
647 | 346k | int *Distortion) { |
648 | 346k | MB_PREDICTION_MODE mode; |
649 | 346k | MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT; |
650 | 346k | int rate, ratey; |
651 | 346k | int distortion; |
652 | 346k | int best_rd = INT_MAX; |
653 | 346k | int this_rd; |
654 | 346k | MACROBLOCKD *xd = &x->e_mbd; |
655 | | |
656 | | /* Y Search for 16x16 intra prediction mode */ |
657 | 1.73M | for (mode = DC_PRED; mode <= TM_PRED; ++mode) { |
658 | 1.38M | xd->mode_info_context->mbmi.mode = mode; |
659 | | |
660 | 1.38M | vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride, |
661 | 1.38M | xd->dst.y_buffer - 1, xd->dst.y_stride, |
662 | 1.38M | xd->predictor, 16); |
663 | | |
664 | 1.38M | macro_block_yrd(x, &ratey, &distortion); |
665 | 1.38M | rate = ratey + |
666 | 1.38M | x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode]; |
667 | | |
668 | 1.38M | this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); |
669 | | |
670 | 1.38M | if (this_rd < best_rd) { |
671 | 439k | mode_selected = mode; |
672 | 439k | best_rd = this_rd; |
673 | 439k | *Rate = rate; |
674 | 439k | *rate_y = ratey; |
675 | 439k | *Distortion = distortion; |
676 | 439k | } |
677 | 1.38M | } |
678 | | |
679 | 346k | assert(mode_selected != MB_MODE_COUNT); |
680 | 346k | xd->mode_info_context->mbmi.mode = mode_selected; |
681 | 346k | return best_rd; |
682 | 346k | } |
683 | | |
684 | 6.11M | static int rd_cost_mbuv(MACROBLOCK *mb) { |
685 | 6.11M | int b; |
686 | 6.11M | int cost = 0; |
687 | 6.11M | MACROBLOCKD *x = &mb->e_mbd; |
688 | 6.11M | ENTROPY_CONTEXT_PLANES t_above, t_left; |
689 | 6.11M | ENTROPY_CONTEXT *ta; |
690 | 6.11M | ENTROPY_CONTEXT *tl; |
691 | | |
692 | 6.11M | memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
693 | 6.11M | memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
694 | | |
695 | 6.11M | ta = (ENTROPY_CONTEXT *)&t_above; |
696 | 6.11M | tl = (ENTROPY_CONTEXT *)&t_left; |
697 | | |
698 | 54.9M | for (b = 16; b < 24; ++b) { |
699 | 48.8M | cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV, |
700 | 48.8M | ta + vp8_block2above[b], tl + vp8_block2left[b]); |
701 | 48.8M | } |
702 | | |
703 | 6.11M | return cost; |
704 | 6.11M | } |
705 | | |
706 | | static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, |
707 | 2.26M | int *distortion, int fullpixel) { |
708 | 2.26M | (void)cpi; |
709 | 2.26M | (void)fullpixel; |
710 | | |
711 | 2.26M | vp8_build_inter16x16_predictors_mbuv(&x->e_mbd); |
712 | 2.26M | vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, |
713 | 2.26M | x->src.uv_stride, &x->e_mbd.predictor[256], |
714 | 2.26M | &x->e_mbd.predictor[320], 8); |
715 | | |
716 | 2.26M | vp8_transform_mbuv(x); |
717 | 2.26M | vp8_quantize_mbuv(x); |
718 | | |
719 | 2.26M | *rate = rd_cost_mbuv(x); |
720 | 2.26M | *distortion = vp8_mbuverror(x) / 4; |
721 | | |
722 | 2.26M | return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); |
723 | 2.26M | } |
724 | | |
725 | | static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, |
726 | 252k | int *distortion, int fullpixel) { |
727 | 252k | (void)cpi; |
728 | 252k | (void)fullpixel; |
729 | | |
730 | 252k | vp8_build_inter4x4_predictors_mbuv(&x->e_mbd); |
731 | 252k | vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, |
732 | 252k | x->src.uv_stride, &x->e_mbd.predictor[256], |
733 | 252k | &x->e_mbd.predictor[320], 8); |
734 | | |
735 | 252k | vp8_transform_mbuv(x); |
736 | 252k | vp8_quantize_mbuv(x); |
737 | | |
738 | 252k | *rate = rd_cost_mbuv(x); |
739 | 252k | *distortion = vp8_mbuverror(x) / 4; |
740 | | |
741 | 252k | return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); |
742 | 252k | } |
743 | | |
744 | | static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, |
745 | 899k | int *rate_tokenonly, int *distortion) { |
746 | 899k | MB_PREDICTION_MODE mode; |
747 | 899k | MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT; |
748 | 899k | int best_rd = INT_MAX; |
749 | 899k | int d = 0, r = 0; |
750 | 899k | int rate_to; |
751 | 899k | MACROBLOCKD *xd = &x->e_mbd; |
752 | | |
753 | 4.49M | for (mode = DC_PRED; mode <= TM_PRED; ++mode) { |
754 | 3.59M | int this_rate; |
755 | 3.59M | int this_distortion; |
756 | 3.59M | int this_rd; |
757 | | |
758 | 3.59M | xd->mode_info_context->mbmi.uv_mode = mode; |
759 | | |
760 | 3.59M | vp8_build_intra_predictors_mbuv_s( |
761 | 3.59M | xd, xd->dst.u_buffer - xd->dst.uv_stride, |
762 | 3.59M | xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1, |
763 | 3.59M | xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256], |
764 | 3.59M | &xd->predictor[320], 8); |
765 | | |
766 | 3.59M | vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, |
767 | 3.59M | x->src.uv_stride, &xd->predictor[256], |
768 | 3.59M | &xd->predictor[320], 8); |
769 | 3.59M | vp8_transform_mbuv(x); |
770 | 3.59M | vp8_quantize_mbuv(x); |
771 | | |
772 | 3.59M | rate_to = rd_cost_mbuv(x); |
773 | 3.59M | this_rate = |
774 | 3.59M | rate_to + x->intra_uv_mode_cost[xd->frame_type] |
775 | 3.59M | [xd->mode_info_context->mbmi.uv_mode]; |
776 | | |
777 | 3.59M | this_distortion = vp8_mbuverror(x) / 4; |
778 | | |
779 | 3.59M | this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); |
780 | | |
781 | 3.59M | if (this_rd < best_rd) { |
782 | 1.12M | best_rd = this_rd; |
783 | 1.12M | d = this_distortion; |
784 | 1.12M | r = this_rate; |
785 | 1.12M | *rate_tokenonly = rate_to; |
786 | 1.12M | mode_selected = mode; |
787 | 1.12M | } |
788 | 3.59M | } |
789 | | |
790 | 899k | *rate = r; |
791 | 899k | *distortion = d; |
792 | | |
793 | 899k | assert(mode_selected != MB_MODE_COUNT); |
794 | 899k | xd->mode_info_context->mbmi.uv_mode = mode_selected; |
795 | 899k | } |
796 | | |
797 | 5.48M | int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) { |
798 | 5.48M | vp8_prob p[VP8_MVREFS - 1]; |
799 | 5.48M | assert(NEARESTMV <= m && m <= SPLITMV); |
800 | 5.48M | vp8_mv_ref_probs(p, near_mv_ref_ct); |
801 | 5.48M | return vp8_cost_token(vp8_mv_ref_tree, p, |
802 | 5.48M | vp8_mv_ref_encoding_array + (m - NEARESTMV)); |
803 | 5.48M | } |
804 | | |
805 | 2.26M | void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { |
806 | 2.26M | x->e_mbd.mode_info_context->mbmi.mode = mb; |
807 | 2.26M | x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int; |
808 | 2.26M | } |
809 | | |
810 | | static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label, |
811 | | B_PREDICTION_MODE this_mode, int_mv *this_mv, |
812 | 20.9M | int_mv *best_ref_mv, int *mvcost[2]) { |
813 | 20.9M | MACROBLOCKD *const xd = &x->e_mbd; |
814 | 20.9M | MODE_INFO *const mic = xd->mode_info_context; |
815 | 20.9M | const int mis = xd->mode_info_stride; |
816 | | |
817 | 20.9M | int cost = 0; |
818 | 20.9M | int thismvcost = 0; |
819 | | |
820 | | /* We have to be careful retrieving previously-encoded motion vectors. |
821 | | Ones from this macroblock have to be pulled from the BLOCKD array |
822 | | as they have not yet made it to the bmi array in our MB_MODE_INFO. */ |
823 | | |
824 | 20.9M | int i = 0; |
825 | | |
826 | 334M | do { |
827 | 334M | BLOCKD *const d = xd->block + i; |
828 | 334M | const int row = i >> 2, col = i & 3; |
829 | | |
830 | 334M | B_PREDICTION_MODE m; |
831 | | |
832 | 334M | if (labelings[i] != which_label) continue; |
833 | | |
834 | 82.5M | if (col && labelings[i] == labelings[i - 1]) { |
835 | 42.7M | m = LEFT4X4; |
836 | 42.7M | } else if (row && labelings[i] == labelings[i - 4]) { |
837 | 18.9M | m = ABOVE4X4; |
838 | 20.9M | } else { |
839 | | /* the only time we should do costing for new motion vector |
840 | | * or mode is when we are on a new label (jbb May 08, 2007) |
841 | | */ |
842 | 20.9M | switch (m = this_mode) { |
843 | 5.68M | case NEW4X4: |
844 | 5.68M | thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102); |
845 | 5.68M | break; |
846 | 6.21M | case LEFT4X4: |
847 | 6.21M | this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i); |
848 | 6.21M | break; |
849 | 4.72M | case ABOVE4X4: |
850 | 4.72M | this_mv->as_int = |
851 | 4.72M | row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis); |
852 | 4.72M | break; |
853 | 4.29M | case ZERO4X4: this_mv->as_int = 0; break; |
854 | 0 | default: break; |
855 | 20.9M | } |
856 | | |
857 | 20.9M | if (m == ABOVE4X4) { /* replace above with left if same */ |
858 | 4.72M | int_mv left_mv; |
859 | | |
860 | 4.72M | left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i); |
861 | | |
862 | 4.72M | if (left_mv.as_int == this_mv->as_int) m = LEFT4X4; |
863 | 4.72M | } |
864 | | |
865 | 20.9M | cost = x->inter_bmode_costs[m]; |
866 | 20.9M | } |
867 | | |
868 | 82.5M | d->bmi.mv.as_int = this_mv->as_int; |
869 | | |
870 | 82.5M | x->partition_info->bmi[i].mode = m; |
871 | 82.5M | x->partition_info->bmi[i].mv.as_int = this_mv->as_int; |
872 | | |
873 | 334M | } while (++i < 16); |
874 | | |
875 | 20.9M | cost += thismvcost; |
876 | 20.9M | return cost; |
877 | 20.9M | } |
878 | | |
879 | | static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, |
880 | | int which_label, ENTROPY_CONTEXT *ta, |
881 | 16.4M | ENTROPY_CONTEXT *tl) { |
882 | 16.4M | int cost = 0; |
883 | 16.4M | int b; |
884 | 16.4M | MACROBLOCKD *x = &mb->e_mbd; |
885 | | |
886 | 280M | for (b = 0; b < 16; ++b) { |
887 | 263M | if (labels[b] == which_label) { |
888 | 64.9M | cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC, |
889 | 64.9M | ta + vp8_block2above[b], tl + vp8_block2left[b]); |
890 | 64.9M | } |
891 | 263M | } |
892 | | |
893 | 16.4M | return cost; |
894 | 16.4M | } |
895 | | static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, |
896 | | int const *labels, |
897 | 16.4M | int which_label) { |
898 | 16.4M | int i; |
899 | 16.4M | unsigned int distortion = 0; |
900 | 16.4M | int pre_stride = x->e_mbd.pre.y_stride; |
901 | 16.4M | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
902 | | |
903 | 280M | for (i = 0; i < 16; ++i) { |
904 | 263M | if (labels[i] == which_label) { |
905 | 64.9M | BLOCKD *bd = &x->e_mbd.block[i]; |
906 | 64.9M | BLOCK *be = &x->block[i]; |
907 | | |
908 | 64.9M | vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, |
909 | 64.9M | x->e_mbd.subpixel_predict); |
910 | 64.9M | vp8_subtract_b(be, bd, 16); |
911 | 64.9M | x->short_fdct4x4(be->src_diff, be->coeff, 32); |
912 | 64.9M | x->quantize_b(be, bd); |
913 | | |
914 | 64.9M | distortion += vp8_block_error(be->coeff, bd->dqcoeff); |
915 | 64.9M | } |
916 | 263M | } |
917 | | |
918 | 16.4M | return distortion; |
919 | 16.4M | } |
920 | | |
921 | | static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 }; |
922 | | |
923 | | typedef struct { |
924 | | int_mv *ref_mv; |
925 | | int_mv mvp; |
926 | | |
927 | | int segment_rd; |
928 | | int segment_num; |
929 | | int r; |
930 | | int d; |
931 | | int segment_yrate; |
932 | | B_PREDICTION_MODE modes[16]; |
933 | | int_mv mvs[16]; |
934 | | unsigned char eobs[16]; |
935 | | |
936 | | int mvthresh; |
937 | | int *mdcounts; |
938 | | |
939 | | int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */ |
940 | | int sv_istep[2]; /* save 2 initial step_param for 16x8/8x16 */ |
941 | | |
942 | | } BEST_SEG_INFO; |
943 | | |
944 | | static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, |
945 | 1.15M | unsigned int segmentation) { |
946 | 1.15M | int i; |
947 | 1.15M | int const *labels; |
948 | 1.15M | int br = 0; |
949 | 1.15M | int bd = 0; |
950 | 1.15M | B_PREDICTION_MODE this_mode; |
951 | | |
952 | 1.15M | int label_count; |
953 | 1.15M | int this_segment_rd = 0; |
954 | 1.15M | int label_mv_thresh; |
955 | 1.15M | int rate = 0; |
956 | 1.15M | int sbr = 0; |
957 | 1.15M | int sbd = 0; |
958 | 1.15M | int segmentyrate = 0; |
959 | | |
960 | 1.15M | vp8_variance_fn_ptr_t *v_fn_ptr; |
961 | | |
962 | 1.15M | ENTROPY_CONTEXT_PLANES t_above, t_left; |
963 | 1.15M | ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; |
964 | | |
965 | 1.15M | memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
966 | 1.15M | memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); |
967 | | |
968 | 1.15M | vp8_zero(t_above_b); |
969 | 1.15M | vp8_zero(t_left_b); |
970 | | |
971 | 1.15M | br = 0; |
972 | 1.15M | bd = 0; |
973 | | |
974 | 1.15M | v_fn_ptr = &cpi->fn_ptr[segmentation]; |
975 | 1.15M | labels = vp8_mbsplits[segmentation]; |
976 | 1.15M | label_count = vp8_mbsplit_count[segmentation]; |
977 | | |
978 | | /* 64 makes this threshold really big effectively making it so that we |
979 | | * very rarely check mvs on segments. setting this to 1 would make mv |
980 | | * thresh roughly equal to what it is for macroblocks |
981 | | */ |
982 | 1.15M | label_mv_thresh = 1 * bsi->mvthresh / label_count; |
983 | | |
984 | | /* Segmentation method overheads */ |
985 | 1.15M | rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, |
986 | 1.15M | vp8_mbsplit_encodings + segmentation); |
987 | 1.15M | rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); |
988 | 1.15M | this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); |
989 | 1.15M | br += rate; |
990 | | |
991 | 4.76M | for (i = 0; i < label_count; ++i) { |
992 | 4.25M | int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } }; |
993 | 4.25M | int best_label_rd = INT_MAX; |
994 | 4.25M | B_PREDICTION_MODE mode_selected = ZERO4X4; |
995 | 4.25M | int bestlabelyrate = 0; |
996 | | |
997 | | /* search for the best motion vector on this segment */ |
998 | 20.9M | for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) { |
999 | 17.0M | int this_rd; |
1000 | 17.0M | int distortion; |
1001 | 17.0M | int labelyrate; |
1002 | 17.0M | ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; |
1003 | 17.0M | ENTROPY_CONTEXT *ta_s; |
1004 | 17.0M | ENTROPY_CONTEXT *tl_s; |
1005 | | |
1006 | 17.0M | memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); |
1007 | 17.0M | memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); |
1008 | | |
1009 | 17.0M | ta_s = (ENTROPY_CONTEXT *)&t_above_s; |
1010 | 17.0M | tl_s = (ENTROPY_CONTEXT *)&t_left_s; |
1011 | | |
1012 | 17.0M | if (this_mode == NEW4X4) { |
1013 | 4.25M | int sseshift; |
1014 | 4.25M | int num00; |
1015 | 4.25M | int step_param = 0; |
1016 | 4.25M | int further_steps; |
1017 | 4.25M | int n; |
1018 | 4.25M | int thissme; |
1019 | 4.25M | int bestsme = INT_MAX; |
1020 | 4.25M | int_mv temp_mv; |
1021 | 4.25M | BLOCK *c; |
1022 | 4.25M | BLOCKD *e; |
1023 | | |
1024 | | /* Is the best so far sufficiently good that we can't justify |
1025 | | * doing a new motion search. |
1026 | | */ |
1027 | 4.25M | if (best_label_rd < label_mv_thresh) break; |
1028 | | |
1029 | 3.92M | if (cpi->compressor_speed) { |
1030 | 3.92M | if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) { |
1031 | 938k | bsi->mvp.as_int = bsi->sv_mvp[i].as_int; |
1032 | 938k | if (i == 1 && segmentation == BLOCK_16X8) { |
1033 | 222k | bsi->mvp.as_int = bsi->sv_mvp[2].as_int; |
1034 | 222k | } |
1035 | | |
1036 | 938k | step_param = bsi->sv_istep[i]; |
1037 | 938k | } |
1038 | | |
1039 | | /* use previous block's result as next block's MV |
1040 | | * predictor. |
1041 | | */ |
1042 | 3.92M | if (segmentation == BLOCK_4X4 && i > 0) { |
1043 | 1.20M | bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int; |
1044 | 1.20M | if (i == 4 || i == 8 || i == 12) { |
1045 | 247k | bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int; |
1046 | 247k | } |
1047 | 1.20M | step_param = 2; |
1048 | 1.20M | } |
1049 | 3.92M | } |
1050 | | |
1051 | 3.92M | further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; |
1052 | | |
1053 | 3.92M | { |
1054 | 3.92M | int sadpb = x->sadperbit4; |
1055 | 3.92M | int_mv mvp_full; |
1056 | | |
1057 | 3.92M | mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; |
1058 | 3.92M | mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; |
1059 | | |
1060 | | /* find first label */ |
1061 | 3.92M | n = vp8_mbsplit_offset[segmentation][i]; |
1062 | | |
1063 | 3.92M | c = &x->block[n]; |
1064 | 3.92M | e = &x->e_mbd.block[n]; |
1065 | | |
1066 | 3.92M | { |
1067 | 3.92M | bestsme = cpi->diamond_search_sad( |
1068 | 3.92M | x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00, |
1069 | 3.92M | v_fn_ptr, x->mvcost, bsi->ref_mv); |
1070 | | |
1071 | 3.92M | n = num00; |
1072 | 3.92M | num00 = 0; |
1073 | | |
1074 | 15.1M | while (n < further_steps) { |
1075 | 11.1M | n++; |
1076 | | |
1077 | 11.1M | if (num00) { |
1078 | 1.52M | num00--; |
1079 | 9.65M | } else { |
1080 | 9.65M | thissme = cpi->diamond_search_sad( |
1081 | 9.65M | x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00, |
1082 | 9.65M | v_fn_ptr, x->mvcost, bsi->ref_mv); |
1083 | | |
1084 | 9.65M | if (thissme < bestsme) { |
1085 | 1.81M | bestsme = thissme; |
1086 | 1.81M | mode_mv[NEW4X4].as_int = temp_mv.as_int; |
1087 | 1.81M | } |
1088 | 9.65M | } |
1089 | 11.1M | } |
1090 | 3.92M | } |
1091 | | |
1092 | 3.92M | sseshift = segmentation_to_sseshift[segmentation]; |
1093 | | |
1094 | | /* Should we do a full search (best quality only) */ |
1095 | 3.92M | if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { |
1096 | | /* Check if mvp_full is within the range. */ |
1097 | 0 | vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
1098 | 0 | x->mv_row_max); |
1099 | |
|
1100 | 0 | thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16, |
1101 | 0 | v_fn_ptr, x->mvcost, bsi->ref_mv); |
1102 | |
|
1103 | 0 | if (thissme < bestsme) { |
1104 | 0 | bestsme = thissme; |
1105 | 0 | mode_mv[NEW4X4].as_int = e->bmi.mv.as_int; |
1106 | 0 | } else { |
1107 | | /* The full search result is actually worse so |
1108 | | * re-instate the previous best vector |
1109 | | */ |
1110 | 0 | e->bmi.mv.as_int = mode_mv[NEW4X4].as_int; |
1111 | 0 | } |
1112 | 0 | } |
1113 | 3.92M | } |
1114 | | |
1115 | 3.92M | if (bestsme < INT_MAX) { |
1116 | 3.92M | int disto; |
1117 | 3.92M | unsigned int sse; |
1118 | 3.92M | cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv, |
1119 | 3.92M | x->errorperbit, v_fn_ptr, x->mvcost, |
1120 | 3.92M | &disto, &sse); |
1121 | 3.92M | } |
1122 | 3.92M | } /* NEW4X4 */ |
1123 | | |
1124 | 16.6M | rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], |
1125 | 16.6M | bsi->ref_mv, x->mvcost); |
1126 | | |
1127 | | /* Trap vectors that reach beyond the UMV borders */ |
1128 | 16.6M | if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || |
1129 | 16.6M | ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || |
1130 | 16.6M | ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || |
1131 | 16.6M | ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { |
1132 | 179k | continue; |
1133 | 179k | } |
1134 | | |
1135 | 16.4M | distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4; |
1136 | | |
1137 | 16.4M | labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s); |
1138 | 16.4M | rate += labelyrate; |
1139 | | |
1140 | 16.4M | this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); |
1141 | | |
1142 | 16.4M | if (this_rd < best_label_rd) { |
1143 | 7.08M | sbr = rate; |
1144 | 7.08M | sbd = distortion; |
1145 | 7.08M | bestlabelyrate = labelyrate; |
1146 | 7.08M | mode_selected = this_mode; |
1147 | 7.08M | best_label_rd = this_rd; |
1148 | | |
1149 | 7.08M | memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES)); |
1150 | 7.08M | memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES)); |
1151 | 7.08M | } |
1152 | 16.4M | } /*for each 4x4 mode*/ |
1153 | | |
1154 | 4.25M | memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES)); |
1155 | 4.25M | memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES)); |
1156 | | |
1157 | 4.25M | labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], |
1158 | 4.25M | bsi->ref_mv, x->mvcost); |
1159 | | |
1160 | 4.25M | br += sbr; |
1161 | 4.25M | bd += sbd; |
1162 | 4.25M | segmentyrate += bestlabelyrate; |
1163 | 4.25M | this_segment_rd += best_label_rd; |
1164 | | |
1165 | 4.25M | if (this_segment_rd >= bsi->segment_rd) break; |
1166 | | |
1167 | 4.25M | } /* for each label */ |
1168 | | |
1169 | 1.15M | if (this_segment_rd < bsi->segment_rd) { |
1170 | 513k | bsi->r = br; |
1171 | 513k | bsi->d = bd; |
1172 | 513k | bsi->segment_yrate = segmentyrate; |
1173 | 513k | bsi->segment_rd = this_segment_rd; |
1174 | 513k | bsi->segment_num = segmentation; |
1175 | | |
1176 | | /* store everything needed to come back to this!! */ |
1177 | 8.72M | for (i = 0; i < 16; ++i) { |
1178 | 8.21M | bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; |
1179 | 8.21M | bsi->modes[i] = x->partition_info->bmi[i].mode; |
1180 | 8.21M | bsi->eobs[i] = x->e_mbd.eobs[i]; |
1181 | 8.21M | } |
1182 | 513k | } |
1183 | 1.15M | } |
1184 | | |
1185 | 1.01M | static void vp8_cal_step_param(int sr, int *sp) { |
1186 | 1.01M | int step = 0; |
1187 | | |
1188 | 1.01M | if (sr > MAX_FIRST_STEP) { |
1189 | 29.3k | sr = MAX_FIRST_STEP; |
1190 | 982k | } else if (sr < 1) { |
1191 | 489k | sr = 1; |
1192 | 489k | } |
1193 | | |
1194 | 2.92M | while (sr >>= 1) step++; |
1195 | | |
1196 | 1.01M | *sp = MAX_MVSEARCH_STEPS - 1 - step; |
1197 | 1.01M | } |
1198 | | |
1199 | | static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, |
1200 | | int_mv *best_ref_mv, int best_rd, |
1201 | | int *mdcounts, int *returntotrate, |
1202 | | int *returnyrate, |
1203 | | int *returndistortion, |
1204 | 556k | int mvthresh) { |
1205 | 556k | int i; |
1206 | 556k | BEST_SEG_INFO bsi; |
1207 | | |
1208 | 556k | memset(&bsi, 0, sizeof(bsi)); |
1209 | | |
1210 | 556k | bsi.segment_rd = best_rd; |
1211 | 556k | bsi.ref_mv = best_ref_mv; |
1212 | 556k | bsi.mvp.as_int = best_ref_mv->as_int; |
1213 | 556k | bsi.mvthresh = mvthresh; |
1214 | 556k | bsi.mdcounts = mdcounts; |
1215 | | |
1216 | 9.45M | for (i = 0; i < 16; ++i) { |
1217 | 8.90M | bsi.modes[i] = ZERO4X4; |
1218 | 8.90M | } |
1219 | | |
1220 | 556k | if (cpi->compressor_speed == 0) { |
1221 | | /* for now, we will keep the original segmentation order |
1222 | | when in best quality mode */ |
1223 | 0 | rd_check_segment(cpi, x, &bsi, BLOCK_16X8); |
1224 | 0 | rd_check_segment(cpi, x, &bsi, BLOCK_8X16); |
1225 | 0 | rd_check_segment(cpi, x, &bsi, BLOCK_8X8); |
1226 | 0 | rd_check_segment(cpi, x, &bsi, BLOCK_4X4); |
1227 | 556k | } else { |
1228 | 556k | int sr; |
1229 | | |
1230 | 556k | rd_check_segment(cpi, x, &bsi, BLOCK_8X8); |
1231 | | |
1232 | 556k | if (bsi.segment_rd < best_rd) { |
1233 | 252k | int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL; |
1234 | 252k | int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL; |
1235 | 252k | int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL; |
1236 | 252k | int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL; |
1237 | | |
1238 | 252k | int tmp_col_min = x->mv_col_min; |
1239 | 252k | int tmp_col_max = x->mv_col_max; |
1240 | 252k | int tmp_row_min = x->mv_row_min; |
1241 | 252k | int tmp_row_max = x->mv_row_max; |
1242 | | |
1243 | | /* Get intersection of UMV window and valid MV window to reduce # of |
1244 | | * checks in diamond search. */ |
1245 | 252k | if (x->mv_col_min < col_min) x->mv_col_min = col_min; |
1246 | 252k | if (x->mv_col_max > col_max) x->mv_col_max = col_max; |
1247 | 252k | if (x->mv_row_min < row_min) x->mv_row_min = row_min; |
1248 | 252k | if (x->mv_row_max > row_max) x->mv_row_max = row_max; |
1249 | | |
1250 | | /* Get 8x8 result */ |
1251 | 252k | bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int; |
1252 | 252k | bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int; |
1253 | 252k | bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int; |
1254 | 252k | bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int; |
1255 | | |
1256 | | /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range |
1257 | | * according to the closeness of 2 MV. */ |
1258 | | /* block 8X16 */ |
1259 | 252k | { |
1260 | 252k | sr = |
1261 | 252k | MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, |
1262 | 252k | (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); |
1263 | 252k | vp8_cal_step_param(sr, &bsi.sv_istep[0]); |
1264 | | |
1265 | 252k | sr = |
1266 | 252k | MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, |
1267 | 252k | (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); |
1268 | 252k | vp8_cal_step_param(sr, &bsi.sv_istep[1]); |
1269 | | |
1270 | 252k | rd_check_segment(cpi, x, &bsi, BLOCK_8X16); |
1271 | 252k | } |
1272 | | |
1273 | | /* block 16X8 */ |
1274 | 252k | { |
1275 | 252k | sr = |
1276 | 252k | MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, |
1277 | 252k | (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); |
1278 | 252k | vp8_cal_step_param(sr, &bsi.sv_istep[0]); |
1279 | | |
1280 | 252k | sr = |
1281 | 252k | MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, |
1282 | 252k | (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); |
1283 | 252k | vp8_cal_step_param(sr, &bsi.sv_istep[1]); |
1284 | | |
1285 | 252k | rd_check_segment(cpi, x, &bsi, BLOCK_16X8); |
1286 | 252k | } |
1287 | | |
1288 | | /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ |
1289 | | /* Not skip 4x4 if speed=0 (good quality) */ |
1290 | 252k | if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) |
1291 | | /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ |
1292 | 91.2k | { |
1293 | 91.2k | bsi.mvp.as_int = bsi.sv_mvp[0].as_int; |
1294 | 91.2k | rd_check_segment(cpi, x, &bsi, BLOCK_4X4); |
1295 | 91.2k | } |
1296 | | |
1297 | | /* restore UMV window */ |
1298 | 252k | x->mv_col_min = tmp_col_min; |
1299 | 252k | x->mv_col_max = tmp_col_max; |
1300 | 252k | x->mv_row_min = tmp_row_min; |
1301 | 252k | x->mv_row_max = tmp_row_max; |
1302 | 252k | } |
1303 | 556k | } |
1304 | | |
1305 | | /* set it to the best */ |
1306 | 9.45M | for (i = 0; i < 16; ++i) { |
1307 | 8.90M | BLOCKD *bd = &x->e_mbd.block[i]; |
1308 | | |
1309 | 8.90M | bd->bmi.mv.as_int = bsi.mvs[i].as_int; |
1310 | 8.90M | *bd->eob = bsi.eobs[i]; |
1311 | 8.90M | } |
1312 | | |
1313 | 556k | *returntotrate = bsi.r; |
1314 | 556k | *returndistortion = bsi.d; |
1315 | 556k | *returnyrate = bsi.segment_yrate; |
1316 | | |
1317 | | /* save partitions */ |
1318 | 556k | x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num; |
1319 | 556k | x->partition_info->count = vp8_mbsplit_count[bsi.segment_num]; |
1320 | | |
1321 | 2.57M | for (i = 0; i < x->partition_info->count; ++i) { |
1322 | 2.01M | int j; |
1323 | | |
1324 | 2.01M | j = vp8_mbsplit_offset[bsi.segment_num][i]; |
1325 | | |
1326 | 2.01M | x->partition_info->bmi[i].mode = bsi.modes[j]; |
1327 | 2.01M | x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv; |
1328 | 2.01M | } |
1329 | | /* |
1330 | | * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int |
1331 | | */ |
1332 | 556k | x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int; |
1333 | | |
1334 | 556k | return bsi.segment_rd; |
1335 | 556k | } |
1336 | | |
1337 | | /* The improved MV prediction */ |
1338 | | void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, |
1339 | | int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr, |
1340 | 1.34M | int near_sadidx[]) { |
1341 | 1.34M | const MODE_INFO *above = here - xd->mode_info_stride; |
1342 | 1.34M | const MODE_INFO *left = here - 1; |
1343 | 1.34M | const MODE_INFO *aboveleft = above - 1; |
1344 | 1.34M | int_mv near_mvs[8]; |
1345 | 1.34M | int near_ref[8]; |
1346 | 1.34M | int_mv mv; |
1347 | 1.34M | int vcnt = 0; |
1348 | 1.34M | int find = 0; |
1349 | 1.34M | int mb_offset; |
1350 | | |
1351 | 1.34M | int mvx[8]; |
1352 | 1.34M | int mvy[8]; |
1353 | 1.34M | int i; |
1354 | | |
1355 | 1.34M | mv.as_int = 0; |
1356 | | |
1357 | 1.34M | if (here->mbmi.ref_frame != INTRA_FRAME) { |
1358 | 1.34M | near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = |
1359 | 1.34M | near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = |
1360 | 1.34M | near_mvs[6].as_int = near_mvs[7].as_int = 0; |
1361 | 1.34M | near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = |
1362 | 1.34M | near_ref[5] = near_ref[6] = near_ref[7] = 0; |
1363 | | |
1364 | | /* read in 3 nearby block's MVs from current frame as prediction |
1365 | | * candidates. |
1366 | | */ |
1367 | 1.34M | if (above->mbmi.ref_frame != INTRA_FRAME) { |
1368 | 386k | near_mvs[vcnt].as_int = above->mbmi.mv.as_int; |
1369 | 386k | mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, |
1370 | 386k | &near_mvs[vcnt], ref_frame_sign_bias); |
1371 | 386k | near_ref[vcnt] = above->mbmi.ref_frame; |
1372 | 386k | } |
1373 | 1.34M | vcnt++; |
1374 | 1.34M | if (left->mbmi.ref_frame != INTRA_FRAME) { |
1375 | 516k | near_mvs[vcnt].as_int = left->mbmi.mv.as_int; |
1376 | 516k | mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, |
1377 | 516k | &near_mvs[vcnt], ref_frame_sign_bias); |
1378 | 516k | near_ref[vcnt] = left->mbmi.ref_frame; |
1379 | 516k | } |
1380 | 1.34M | vcnt++; |
1381 | 1.34M | if (aboveleft->mbmi.ref_frame != INTRA_FRAME) { |
1382 | 308k | near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int; |
1383 | 308k | mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, |
1384 | 308k | &near_mvs[vcnt], ref_frame_sign_bias); |
1385 | 308k | near_ref[vcnt] = aboveleft->mbmi.ref_frame; |
1386 | 308k | } |
1387 | 1.34M | vcnt++; |
1388 | | |
1389 | | /* read in 5 nearby block's MVs from last frame. */ |
1390 | 1.34M | if (cpi->common.last_frame_type != KEY_FRAME) { |
1391 | 876k | mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) + |
1392 | 876k | (-xd->mb_to_left_edge / 128 + 1); |
1393 | | |
1394 | | /* current in last frame */ |
1395 | 876k | if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) { |
1396 | 465k | near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; |
1397 | 465k | mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, |
1398 | 465k | &near_mvs[vcnt], ref_frame_sign_bias); |
1399 | 465k | near_ref[vcnt] = cpi->lf_ref_frame[mb_offset]; |
1400 | 465k | } |
1401 | 876k | vcnt++; |
1402 | | |
1403 | | /* above in last frame */ |
1404 | 876k | if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] != |
1405 | 876k | INTRA_FRAME) { |
1406 | 284k | near_mvs[vcnt].as_int = |
1407 | 284k | cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int; |
1408 | 284k | mv_bias( |
1409 | 284k | cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1], |
1410 | 284k | refframe, &near_mvs[vcnt], ref_frame_sign_bias); |
1411 | 284k | near_ref[vcnt] = |
1412 | 284k | cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1]; |
1413 | 284k | } |
1414 | 876k | vcnt++; |
1415 | | |
1416 | | /* left in last frame */ |
1417 | 876k | if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) { |
1418 | 339k | near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int; |
1419 | 339k | mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe, |
1420 | 339k | &near_mvs[vcnt], ref_frame_sign_bias); |
1421 | 339k | near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1]; |
1422 | 339k | } |
1423 | 876k | vcnt++; |
1424 | | |
1425 | | /* right in last frame */ |
1426 | 876k | if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) { |
1427 | 334k | near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int; |
1428 | 334k | mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe, |
1429 | 334k | &near_mvs[vcnt], ref_frame_sign_bias); |
1430 | 334k | near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1]; |
1431 | 334k | } |
1432 | 876k | vcnt++; |
1433 | | |
1434 | | /* below in last frame */ |
1435 | 876k | if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] != |
1436 | 876k | INTRA_FRAME) { |
1437 | 275k | near_mvs[vcnt].as_int = |
1438 | 275k | cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int; |
1439 | 275k | mv_bias( |
1440 | 275k | cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1], |
1441 | 275k | refframe, &near_mvs[vcnt], ref_frame_sign_bias); |
1442 | 275k | near_ref[vcnt] = |
1443 | 275k | cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1]; |
1444 | 275k | } |
1445 | 876k | vcnt++; |
1446 | 876k | } |
1447 | | |
1448 | 5.88M | for (i = 0; i < vcnt; ++i) { |
1449 | 5.25M | if (near_ref[near_sadidx[i]] != INTRA_FRAME) { |
1450 | 1.70M | if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) { |
1451 | 717k | mv.as_int = near_mvs[near_sadidx[i]].as_int; |
1452 | 717k | find = 1; |
1453 | 717k | if (i < 3) { |
1454 | 622k | *sr = 3; |
1455 | 622k | } else { |
1456 | 94.3k | *sr = 2; |
1457 | 94.3k | } |
1458 | 717k | break; |
1459 | 717k | } |
1460 | 1.70M | } |
1461 | 5.25M | } |
1462 | | |
1463 | 1.34M | if (!find) { |
1464 | 4.49M | for (i = 0; i < vcnt; ++i) { |
1465 | 3.86M | mvx[i] = near_mvs[i].as_mv.row; |
1466 | 3.86M | mvy[i] = near_mvs[i].as_mv.col; |
1467 | 3.86M | } |
1468 | | |
1469 | 626k | insertsortmv(mvx, vcnt); |
1470 | 626k | insertsortmv(mvy, vcnt); |
1471 | 626k | mv.as_mv.row = mvx[vcnt / 2]; |
1472 | 626k | mv.as_mv.col = mvy[vcnt / 2]; |
1473 | | |
1474 | | /* sr is set to 0 to allow calling function to decide the search |
1475 | | * range. |
1476 | | */ |
1477 | 626k | *sr = 0; |
1478 | 626k | } |
1479 | 1.34M | } |
1480 | | |
1481 | | /* Set up return values */ |
1482 | 1.34M | mvp->as_int = mv.as_int; |
1483 | 1.34M | vp8_clamp_mv2(mvp, xd); |
1484 | 1.34M | } |
1485 | | |
1486 | | void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, |
1487 | 951k | int recon_yoffset, int near_sadidx[]) { |
1488 | | /* near_sad indexes: |
1489 | | * 0-cf above, 1-cf left, 2-cf aboveleft, |
1490 | | * 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below |
1491 | | */ |
1492 | 951k | int near_sad[8] = { 0 }; |
1493 | 951k | BLOCK *b = &x->block[0]; |
1494 | 951k | unsigned char *src_y_ptr = *(b->base_src); |
1495 | | |
1496 | | /* calculate sad for current frame 3 nearby MBs. */ |
1497 | 951k | if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) { |
1498 | 57.3k | near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; |
1499 | 893k | } else if (xd->mb_to_top_edge == |
1500 | 893k | 0) { /* only has left MB for sad calculation. */ |
1501 | 336k | near_sad[0] = near_sad[2] = INT_MAX; |
1502 | 336k | near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1503 | 336k | src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride); |
1504 | 556k | } else if (xd->mb_to_left_edge == |
1505 | 556k | 0) { /* only has left MB for sad calculation. */ |
1506 | 86.0k | near_sad[1] = near_sad[2] = INT_MAX; |
1507 | 86.0k | near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1508 | 86.0k | src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16, |
1509 | 86.0k | xd->dst.y_stride); |
1510 | 470k | } else { |
1511 | 470k | near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1512 | 470k | src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16, |
1513 | 470k | xd->dst.y_stride); |
1514 | 470k | near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1515 | 470k | src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride); |
1516 | 470k | near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1517 | 470k | src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16, |
1518 | 470k | xd->dst.y_stride); |
1519 | 470k | } |
1520 | | |
1521 | 951k | if (cpi->common.last_frame_type != KEY_FRAME) { |
1522 | | /* calculate sad for last frame 5 nearby MBs. */ |
1523 | 483k | unsigned char *pre_y_buffer = |
1524 | 483k | cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; |
1525 | 483k | int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; |
1526 | | |
1527 | 483k | if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX; |
1528 | 483k | if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX; |
1529 | 483k | if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX; |
1530 | 483k | if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX; |
1531 | | |
1532 | 483k | if (near_sad[4] != INT_MAX) { |
1533 | 306k | near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1534 | 306k | src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16, |
1535 | 306k | pre_y_stride); |
1536 | 306k | } |
1537 | 483k | if (near_sad[5] != INT_MAX) { |
1538 | 375k | near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1539 | 375k | src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride); |
1540 | 375k | } |
1541 | 483k | near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, |
1542 | 483k | pre_y_buffer, pre_y_stride); |
1543 | 483k | if (near_sad[6] != INT_MAX) { |
1544 | 377k | near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1545 | 377k | src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride); |
1546 | 377k | } |
1547 | 483k | if (near_sad[7] != INT_MAX) { |
1548 | 323k | near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf( |
1549 | 323k | src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16, |
1550 | 323k | pre_y_stride); |
1551 | 323k | } |
1552 | 483k | } |
1553 | | |
1554 | 951k | if (cpi->common.last_frame_type != KEY_FRAME) { |
1555 | 483k | insertsortsad(near_sad, near_sadidx, 8); |
1556 | 483k | } else { |
1557 | 467k | insertsortsad(near_sad, near_sadidx, 3); |
1558 | 467k | } |
1559 | 951k | } |
1560 | | |
1561 | 552k | static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { |
1562 | 552k | if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) { |
1563 | 113k | int i; |
1564 | | |
1565 | 908k | for (i = 0; i < x->partition_info->count; ++i) { |
1566 | 794k | if (x->partition_info->bmi[i].mode == NEW4X4) { |
1567 | 309k | const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row - |
1568 | 309k | best_ref_mv->as_mv.row) >> |
1569 | 309k | 1); |
1570 | 309k | const int row_idx = mv_max + row_val; |
1571 | 309k | const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col - |
1572 | 309k | best_ref_mv->as_mv.col) >> |
1573 | 309k | 1); |
1574 | 309k | const int col_idx = mv_max + col_val; |
1575 | 309k | if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && |
1576 | 309k | col_idx < MVvals) { |
1577 | 309k | x->MVcount[0][row_idx]++; |
1578 | 309k | x->MVcount[1][col_idx]++; |
1579 | 309k | } |
1580 | 309k | } |
1581 | 794k | } |
1582 | 439k | } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) { |
1583 | 69.0k | const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row - |
1584 | 69.0k | best_ref_mv->as_mv.row) >> |
1585 | 69.0k | 1); |
1586 | 69.0k | const int row_idx = mv_max + row_val; |
1587 | 69.0k | const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col - |
1588 | 69.0k | best_ref_mv->as_mv.col) >> |
1589 | 69.0k | 1); |
1590 | 69.0k | const int col_idx = mv_max + col_val; |
1591 | 69.0k | if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) { |
1592 | 69.0k | x->MVcount[0][row_idx]++; |
1593 | 69.0k | x->MVcount[1][col_idx]++; |
1594 | 69.0k | } |
1595 | 69.0k | } |
1596 | 552k | } |
1597 | | |
1598 | | static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd, |
1599 | | int *disable_skip, VP8_COMP *cpi, |
1600 | 2.26M | MACROBLOCK *x) { |
1601 | 2.26M | MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; |
1602 | 2.26M | BLOCK *b = &x->block[0]; |
1603 | 2.26M | MACROBLOCKD *xd = &x->e_mbd; |
1604 | 2.26M | int distortion; |
1605 | 2.26M | vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16); |
1606 | | |
1607 | 2.26M | if (cpi->active_map_enabled && x->active_ptr[0] == 0) { |
1608 | 0 | x->skip = 1; |
1609 | 2.26M | } else if (x->encode_breakout) { |
1610 | 0 | unsigned int sse; |
1611 | 0 | unsigned int var; |
1612 | 0 | unsigned int threshold = |
1613 | 0 | (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4); |
1614 | |
|
1615 | 0 | if (threshold < x->encode_breakout) threshold = x->encode_breakout; |
1616 | |
|
1617 | 0 | var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor, |
1618 | 0 | 16, &sse); |
1619 | |
|
1620 | 0 | if (sse < threshold) { |
1621 | 0 | unsigned int q2dc = xd->block[24].dequant[0]; |
1622 | | /* If theres is no codeable 2nd order dc |
1623 | | or a very small uniform pixel change change */ |
1624 | 0 | if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) { |
1625 | | /* Check u and v to make sure skip is ok */ |
1626 | 0 | unsigned int sse2 = VP8_UVSSE(x); |
1627 | 0 | if (sse2 * 2 < threshold) { |
1628 | 0 | x->skip = 1; |
1629 | 0 | rd->distortion2 = sse + sse2; |
1630 | 0 | rd->rate2 = 500; |
1631 | | |
1632 | | /* for best_yrd calculation */ |
1633 | 0 | rd->rate_uv = 0; |
1634 | 0 | rd->distortion_uv = sse2; |
1635 | |
|
1636 | 0 | *disable_skip = 1; |
1637 | 0 | return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); |
1638 | 0 | } |
1639 | 0 | } |
1640 | 0 | } |
1641 | 0 | } |
1642 | | |
1643 | | /* Add in the Mv/mode cost */ |
1644 | 2.26M | rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts); |
1645 | | |
1646 | | /* Y cost and distortion */ |
1647 | 2.26M | macro_block_yrd(x, &rd->rate_y, &distortion); |
1648 | 2.26M | rd->rate2 += rd->rate_y; |
1649 | 2.26M | rd->distortion2 += distortion; |
1650 | | |
1651 | | /* UV cost and distortion */ |
1652 | 2.26M | rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv, |
1653 | 2.26M | cpi->common.full_pixel); |
1654 | 2.26M | rd->rate2 += rd->rate_uv; |
1655 | 2.26M | rd->distortion2 += rd->distortion_uv; |
1656 | 2.26M | return INT_MAX; |
1657 | 2.26M | } |
1658 | | |
1659 | | static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd, |
1660 | | int *other_cost, int disable_skip, |
1661 | | int uv_intra_tteob, int intra_rd_penalty, |
1662 | 5.16M | VP8_COMP *cpi, MACROBLOCK *x) { |
1663 | 5.16M | MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; |
1664 | | |
1665 | | /* Where skip is allowable add in the default per mb cost for the no |
1666 | | * skip case. where we then decide to skip we have to delete this and |
1667 | | * replace it with the cost of signalling a skip |
1668 | | */ |
1669 | 5.16M | if (cpi->common.mb_no_coeff_skip) { |
1670 | 5.16M | *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); |
1671 | 5.16M | rd->rate2 += *other_cost; |
1672 | 5.16M | } |
1673 | | |
1674 | | /* Estimate the reference frame signaling cost and add it |
1675 | | * to the rolling cost variable. |
1676 | | */ |
1677 | 5.16M | rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; |
1678 | | |
1679 | 5.16M | if (!disable_skip) { |
1680 | | /* Test for the condition where skip block will be activated |
1681 | | * because there are no non zero coefficients and make any |
1682 | | * necessary adjustment for rate |
1683 | | */ |
1684 | 4.61M | if (cpi->common.mb_no_coeff_skip) { |
1685 | 4.61M | int i; |
1686 | 4.61M | int tteob; |
1687 | 4.61M | int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED); |
1688 | | |
1689 | 4.61M | tteob = 0; |
1690 | 4.61M | if (has_y2_block) tteob += x->e_mbd.eobs[24]; |
1691 | | |
1692 | 78.5M | for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block); |
1693 | | |
1694 | 4.61M | if (x->e_mbd.mode_info_context->mbmi.ref_frame) { |
1695 | 22.6M | for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i]; |
1696 | 2.51M | } else { |
1697 | 2.10M | tteob += uv_intra_tteob; |
1698 | 2.10M | } |
1699 | | |
1700 | 4.61M | if (tteob == 0) { |
1701 | 315k | rd->rate2 -= (rd->rate_y + rd->rate_uv); |
1702 | | /* for best_yrd calculation */ |
1703 | 315k | rd->rate_uv = 0; |
1704 | | |
1705 | | /* Back out no skip flag costing and add in skip flag costing */ |
1706 | 315k | if (cpi->prob_skip_false) { |
1707 | 315k | int prob_skip_cost; |
1708 | | |
1709 | 315k | prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1); |
1710 | 315k | prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0); |
1711 | 315k | rd->rate2 += prob_skip_cost; |
1712 | 315k | *other_cost += prob_skip_cost; |
1713 | 315k | } |
1714 | 315k | } |
1715 | 4.61M | } |
1716 | | /* Calculate the final RD estimate for this mode */ |
1717 | 4.61M | this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); |
1718 | 4.61M | if (this_rd < INT_MAX && |
1719 | 4.61M | x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) { |
1720 | 2.10M | this_rd += intra_rd_penalty; |
1721 | 2.10M | } |
1722 | 4.61M | } |
1723 | 5.16M | return this_rd; |
1724 | 5.16M | } |
1725 | | |
1726 | | static void update_best_mode(BEST_MODE *best_mode, int this_rd, |
1727 | | RATE_DISTORTION *rd, int other_cost, |
1728 | 1.76M | MACROBLOCK *x) { |
1729 | 1.76M | MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; |
1730 | | |
1731 | 1.76M | other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; |
1732 | | |
1733 | | /* Calculate the final y RD estimate for this mode */ |
1734 | 1.76M | best_mode->yrd = |
1735 | 1.76M | RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost), |
1736 | 1.76M | (rd->distortion2 - rd->distortion_uv)); |
1737 | | |
1738 | 1.76M | best_mode->rd = this_rd; |
1739 | 1.76M | memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, |
1740 | 1.76M | sizeof(MB_MODE_INFO)); |
1741 | 1.76M | memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO)); |
1742 | | |
1743 | 1.76M | if ((this_mode == B_PRED) || (this_mode == SPLITMV)) { |
1744 | 349k | int i; |
1745 | 5.93M | for (i = 0; i < 16; ++i) { |
1746 | 5.58M | best_mode->bmodes[i] = x->e_mbd.block[i].bmi; |
1747 | 5.58M | } |
1748 | 349k | } |
1749 | 1.76M | } |
1750 | | |
1751 | | void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, |
1752 | | int recon_uvoffset, int *returnrate, |
1753 | | int *returndistortion, int *returnintra, int mb_row, |
1754 | 552k | int mb_col) { |
1755 | 552k | BLOCK *b = &x->block[0]; |
1756 | 552k | BLOCKD *d = &x->e_mbd.block[0]; |
1757 | 552k | MACROBLOCKD *xd = &x->e_mbd; |
1758 | 552k | int_mv best_ref_mv_sb[2]; |
1759 | 552k | int_mv mode_mv_sb[2][MB_MODE_COUNT]; |
1760 | 552k | int_mv best_ref_mv; |
1761 | 552k | int_mv *mode_mv; |
1762 | 552k | MB_PREDICTION_MODE this_mode; |
1763 | 552k | int num00; |
1764 | 552k | int best_mode_index = 0; |
1765 | 552k | BEST_MODE best_mode; |
1766 | | |
1767 | 552k | int i; |
1768 | 552k | int mode_index; |
1769 | 552k | int mdcounts[4]; |
1770 | 552k | int rate; |
1771 | 552k | RATE_DISTORTION rd; |
1772 | 552k | int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; |
1773 | 552k | int uv_intra_tteob = 0; |
1774 | 552k | int uv_intra_done = 0; |
1775 | | |
1776 | 552k | MB_PREDICTION_MODE uv_intra_mode = 0; |
1777 | 552k | int_mv mvp; |
1778 | 552k | int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; |
1779 | 552k | int saddone = 0; |
1780 | | /* search range got from mv_pred(). It uses step_param levels. (0-7) */ |
1781 | 552k | int sr = 0; |
1782 | | |
1783 | 552k | unsigned char *plane[4][3] = { { 0, 0 } }; |
1784 | 552k | int ref_frame_map[4]; |
1785 | 552k | int sign_bias = 0; |
1786 | | |
1787 | 552k | int intra_rd_penalty = |
1788 | 552k | 10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q); |
1789 | | |
1790 | 552k | #if CONFIG_TEMPORAL_DENOISING |
1791 | 552k | unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX, |
1792 | 552k | best_rd_sse = UINT_MAX; |
1793 | 552k | #endif |
1794 | | |
1795 | | // _uv variables are not set consistantly before calling update_best_mode. |
1796 | 552k | rd.rate_uv = 0; |
1797 | 552k | rd.distortion_uv = 0; |
1798 | | |
1799 | 552k | mode_mv = mode_mv_sb[sign_bias]; |
1800 | 552k | best_ref_mv.as_int = 0; |
1801 | 552k | best_mode.rd = INT_MAX; |
1802 | 552k | best_mode.yrd = INT_MAX; |
1803 | 552k | best_mode.intra_rd = INT_MAX; |
1804 | 552k | memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); |
1805 | 552k | memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode)); |
1806 | 552k | memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes)); |
1807 | | |
1808 | | /* Setup search priorities */ |
1809 | 552k | get_reference_search_order(cpi, ref_frame_map); |
1810 | | |
1811 | | /* Check to see if there is at least 1 valid reference frame that we need |
1812 | | * to calculate near_mvs. |
1813 | | */ |
1814 | 552k | if (ref_frame_map[1] > 0) { |
1815 | 552k | sign_bias = vp8_find_near_mvs_bias( |
1816 | 552k | &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb, |
1817 | 552k | mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias); |
1818 | | |
1819 | 552k | mode_mv = mode_mv_sb[sign_bias]; |
1820 | 552k | best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int; |
1821 | 552k | } |
1822 | | |
1823 | 552k | get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); |
1824 | | |
1825 | 552k | *returnintra = INT_MAX; |
1826 | | /* Count of the number of MBs tested so far this frame */ |
1827 | 552k | x->mbs_tested_so_far++; |
1828 | | |
1829 | 552k | x->skip = 0; |
1830 | | |
1831 | 11.6M | for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { |
1832 | 11.0M | int this_rd = INT_MAX; |
1833 | 11.0M | int disable_skip = 0; |
1834 | 11.0M | int other_cost = 0; |
1835 | 11.0M | int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; |
1836 | | |
1837 | | /* Test best rd so far against threshold for trying this mode. */ |
1838 | 11.0M | if (best_mode.rd <= x->rd_threshes[mode_index]) continue; |
1839 | | |
1840 | 10.0M | if (this_ref_frame < 0) continue; |
1841 | | |
1842 | | /* These variables hold are rolling total cost and distortion for |
1843 | | * this mode |
1844 | | */ |
1845 | 6.56M | rd.rate2 = 0; |
1846 | 6.56M | rd.distortion2 = 0; |
1847 | | |
1848 | 6.56M | this_mode = vp8_mode_order[mode_index]; |
1849 | | |
1850 | 6.56M | x->e_mbd.mode_info_context->mbmi.mode = this_mode; |
1851 | 6.56M | x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; |
1852 | | |
1853 | | /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame, |
1854 | | * unless ARNR filtering is enabled in which case we want |
1855 | | * an unfiltered alternative |
1856 | | */ |
1857 | 6.56M | if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { |
1858 | 0 | if (this_mode != ZEROMV || |
1859 | 0 | x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) { |
1860 | 0 | continue; |
1861 | 0 | } |
1862 | 0 | } |
1863 | | |
1864 | | /* everything but intra */ |
1865 | 6.56M | if (x->e_mbd.mode_info_context->mbmi.ref_frame) { |
1866 | 4.21M | assert(plane[this_ref_frame][0] != NULL && |
1867 | 4.21M | plane[this_ref_frame][1] != NULL && |
1868 | 4.21M | plane[this_ref_frame][2] != NULL); |
1869 | 4.21M | x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; |
1870 | 4.21M | x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; |
1871 | 4.21M | x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; |
1872 | | |
1873 | 4.21M | if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) { |
1874 | 0 | sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame]; |
1875 | 0 | mode_mv = mode_mv_sb[sign_bias]; |
1876 | 0 | best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int; |
1877 | 0 | } |
1878 | 4.21M | } |
1879 | | |
1880 | | /* Check to see if the testing frequency for this mode is at its |
1881 | | * max If so then prevent it from being tested and increase the |
1882 | | * threshold for its testing |
1883 | | */ |
1884 | 6.56M | if (x->mode_test_hit_counts[mode_index] && |
1885 | 6.56M | (cpi->mode_check_freq[mode_index] > 1)) { |
1886 | 211k | if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * |
1887 | 211k | x->mode_test_hit_counts[mode_index]) { |
1888 | | /* Increase the threshold for coding this mode to make it |
1889 | | * less likely to be chosen |
1890 | | */ |
1891 | 114k | x->rd_thresh_mult[mode_index] += 4; |
1892 | | |
1893 | 114k | if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) { |
1894 | 17.0k | x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; |
1895 | 17.0k | } |
1896 | | |
1897 | 114k | x->rd_threshes[mode_index] = |
1898 | 114k | (cpi->rd_baseline_thresh[mode_index] >> 7) * |
1899 | 114k | x->rd_thresh_mult[mode_index]; |
1900 | | |
1901 | 114k | continue; |
1902 | 114k | } |
1903 | 211k | } |
1904 | | |
1905 | | /* We have now reached the point where we are going to test the |
1906 | | * current mode so increment the counter for the number of times |
1907 | | * it has been tested |
1908 | | */ |
1909 | 6.45M | x->mode_test_hit_counts[mode_index]++; |
1910 | | |
1911 | | /* Experimental code. Special case for gf and arf zeromv modes. |
1912 | | * Increase zbin size to supress noise |
1913 | | */ |
1914 | 6.45M | if (x->zbin_mode_boost_enabled) { |
1915 | 0 | if (this_ref_frame == INTRA_FRAME) { |
1916 | 0 | x->zbin_mode_boost = 0; |
1917 | 0 | } else { |
1918 | 0 | if (vp8_mode_order[mode_index] == ZEROMV) { |
1919 | 0 | if (this_ref_frame != LAST_FRAME) { |
1920 | 0 | x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; |
1921 | 0 | } else { |
1922 | 0 | x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; |
1923 | 0 | } |
1924 | 0 | } else if (vp8_mode_order[mode_index] == SPLITMV) { |
1925 | 0 | x->zbin_mode_boost = 0; |
1926 | 0 | } else { |
1927 | 0 | x->zbin_mode_boost = MV_ZBIN_BOOST; |
1928 | 0 | } |
1929 | 0 | } |
1930 | |
|
1931 | 0 | vp8_update_zbin_extra(cpi, x); |
1932 | 0 | } |
1933 | | |
1934 | 6.45M | if (!uv_intra_done && this_ref_frame == INTRA_FRAME) { |
1935 | 552k | rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly, |
1936 | 552k | &uv_intra_distortion); |
1937 | 552k | uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; |
1938 | | |
1939 | | /* |
1940 | | * Total of the eobs is used later to further adjust rate2. Since uv |
1941 | | * block's intra eobs will be overwritten when we check inter modes, |
1942 | | * we need to save uv_intra_tteob here. |
1943 | | */ |
1944 | 4.97M | for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i]; |
1945 | | |
1946 | 552k | uv_intra_done = 1; |
1947 | 552k | } |
1948 | | |
1949 | 6.45M | switch (this_mode) { |
1950 | 391k | case B_PRED: { |
1951 | 391k | int tmp_rd; |
1952 | | |
1953 | | /* Note the rate value returned here includes the cost of |
1954 | | * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED] |
1955 | | */ |
1956 | 391k | int distortion; |
1957 | 391k | tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, |
1958 | 391k | best_mode.yrd); |
1959 | 391k | rd.rate2 += rate; |
1960 | 391k | rd.distortion2 += distortion; |
1961 | | |
1962 | 391k | if (tmp_rd < best_mode.yrd) { |
1963 | 148k | assert(uv_intra_done); |
1964 | 148k | rd.rate2 += uv_intra_rate; |
1965 | 148k | rd.rate_uv = uv_intra_rate_tokenonly; |
1966 | 148k | rd.distortion2 += uv_intra_distortion; |
1967 | 148k | rd.distortion_uv = uv_intra_distortion; |
1968 | 242k | } else { |
1969 | 242k | this_rd = INT_MAX; |
1970 | 242k | disable_skip = 1; |
1971 | 242k | } |
1972 | 391k | break; |
1973 | 0 | } |
1974 | | |
1975 | 556k | case SPLITMV: { |
1976 | 556k | int tmp_rd; |
1977 | 556k | int this_rd_thresh; |
1978 | 556k | int distortion; |
1979 | | |
1980 | 556k | this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) |
1981 | 556k | ? x->rd_threshes[THR_NEW1] |
1982 | 556k | : x->rd_threshes[THR_NEW3]; |
1983 | 556k | this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) |
1984 | 556k | ? x->rd_threshes[THR_NEW2] |
1985 | 556k | : this_rd_thresh; |
1986 | | |
1987 | 556k | tmp_rd = vp8_rd_pick_best_mbsegmentation( |
1988 | 556k | cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y, |
1989 | 556k | &distortion, this_rd_thresh); |
1990 | | |
1991 | 556k | rd.rate2 += rate; |
1992 | 556k | rd.distortion2 += distortion; |
1993 | | |
1994 | | /* If even the 'Y' rd value of split is higher than best so far |
1995 | | * then don't bother looking at UV |
1996 | | */ |
1997 | 556k | if (tmp_rd < best_mode.yrd) { |
1998 | | /* Now work out UV cost and add it in */ |
1999 | 252k | rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, |
2000 | 252k | cpi->common.full_pixel); |
2001 | 252k | rd.rate2 += rd.rate_uv; |
2002 | 252k | rd.distortion2 += rd.distortion_uv; |
2003 | 303k | } else { |
2004 | 303k | this_rd = INT_MAX; |
2005 | 303k | disable_skip = 1; |
2006 | 303k | } |
2007 | 556k | break; |
2008 | 0 | } |
2009 | 552k | case DC_PRED: |
2010 | 1.02M | case V_PRED: |
2011 | 1.49M | case H_PRED: |
2012 | 1.95M | case TM_PRED: { |
2013 | 1.95M | int distortion; |
2014 | 1.95M | x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; |
2015 | | |
2016 | 1.95M | vp8_build_intra_predictors_mby_s( |
2017 | 1.95M | xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1, |
2018 | 1.95M | xd->dst.y_stride, xd->predictor, 16); |
2019 | 1.95M | macro_block_yrd(x, &rd.rate_y, &distortion); |
2020 | 1.95M | rd.rate2 += rd.rate_y; |
2021 | 1.95M | rd.distortion2 += distortion; |
2022 | 1.95M | rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type] |
2023 | 1.95M | [x->e_mbd.mode_info_context->mbmi.mode]; |
2024 | 1.95M | assert(uv_intra_done); |
2025 | 1.95M | rd.rate2 += uv_intra_rate; |
2026 | 1.95M | rd.rate_uv = uv_intra_rate_tokenonly; |
2027 | 1.95M | rd.distortion2 += uv_intra_distortion; |
2028 | 1.95M | rd.distortion_uv = uv_intra_distortion; |
2029 | 1.95M | break; |
2030 | 1.49M | } |
2031 | | |
2032 | 784k | case NEWMV: { |
2033 | 784k | int thissme; |
2034 | 784k | int bestsme = INT_MAX; |
2035 | 784k | int step_param = cpi->sf.first_step; |
2036 | 784k | int further_steps; |
2037 | 784k | int n; |
2038 | | /* If last step (1-away) of n-step search doesn't pick the center point |
2039 | | as the best match, we will do a final 1-away diamond refining search |
2040 | | */ |
2041 | 784k | int do_refine = 1; |
2042 | | |
2043 | 784k | int sadpb = x->sadperbit16; |
2044 | 784k | int_mv mvp_full; |
2045 | | |
2046 | 784k | int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL; |
2047 | 784k | int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL; |
2048 | 784k | int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL; |
2049 | 784k | int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL; |
2050 | | |
2051 | 784k | int tmp_col_min = x->mv_col_min; |
2052 | 784k | int tmp_col_max = x->mv_col_max; |
2053 | 784k | int tmp_row_min = x->mv_row_min; |
2054 | 784k | int tmp_row_max = x->mv_row_max; |
2055 | | |
2056 | 784k | if (!saddone) { |
2057 | 484k | vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]); |
2058 | 484k | saddone = 1; |
2059 | 484k | } |
2060 | | |
2061 | 784k | vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp, |
2062 | 784k | x->e_mbd.mode_info_context->mbmi.ref_frame, |
2063 | 784k | cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); |
2064 | | |
2065 | 784k | mvp_full.as_mv.col = mvp.as_mv.col >> 3; |
2066 | 784k | mvp_full.as_mv.row = mvp.as_mv.row >> 3; |
2067 | | |
2068 | | /* Get intersection of UMV window and valid MV window to |
2069 | | * reduce # of checks in diamond search. |
2070 | | */ |
2071 | 784k | if (x->mv_col_min < col_min) x->mv_col_min = col_min; |
2072 | 784k | if (x->mv_col_max > col_max) x->mv_col_max = col_max; |
2073 | 784k | if (x->mv_row_min < row_min) x->mv_row_min = row_min; |
2074 | 784k | if (x->mv_row_max > row_max) x->mv_row_max = row_max; |
2075 | | |
2076 | | /* adjust search range according to sr from mv prediction */ |
2077 | 784k | if (sr > step_param) step_param = sr; |
2078 | | |
2079 | | /* Initial step/diamond search */ |
2080 | 784k | { |
2081 | 784k | bestsme = cpi->diamond_search_sad( |
2082 | 784k | x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00, |
2083 | 784k | &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); |
2084 | 784k | mode_mv[NEWMV].as_int = d->bmi.mv.as_int; |
2085 | | |
2086 | | /* Further step/diamond searches as necessary */ |
2087 | 784k | further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; |
2088 | | |
2089 | 784k | n = num00; |
2090 | 784k | num00 = 0; |
2091 | | |
2092 | | /* If there won't be more n-step search, check to see if refining |
2093 | | * search is needed. */ |
2094 | 784k | if (n > further_steps) do_refine = 0; |
2095 | | |
2096 | 3.41M | while (n < further_steps) { |
2097 | 2.62M | n++; |
2098 | | |
2099 | 2.62M | if (num00) { |
2100 | 259k | num00--; |
2101 | 2.37M | } else { |
2102 | 2.37M | thissme = cpi->diamond_search_sad( |
2103 | 2.37M | x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00, |
2104 | 2.37M | &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); |
2105 | | |
2106 | | /* check to see if refining search is needed. */ |
2107 | 2.37M | if (num00 > (further_steps - n)) do_refine = 0; |
2108 | | |
2109 | 2.37M | if (thissme < bestsme) { |
2110 | 368k | bestsme = thissme; |
2111 | 368k | mode_mv[NEWMV].as_int = d->bmi.mv.as_int; |
2112 | 2.00M | } else { |
2113 | 2.00M | d->bmi.mv.as_int = mode_mv[NEWMV].as_int; |
2114 | 2.00M | } |
2115 | 2.37M | } |
2116 | 2.62M | } |
2117 | 784k | } |
2118 | | |
2119 | | /* final 1-away diamond refining search */ |
2120 | 784k | if (do_refine == 1) { |
2121 | 503k | int search_range; |
2122 | | |
2123 | 503k | search_range = 8; |
2124 | | |
2125 | 503k | thissme = cpi->refining_search_sad( |
2126 | 503k | x, b, d, &d->bmi.mv, sadpb, search_range, |
2127 | 503k | &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); |
2128 | | |
2129 | 503k | if (thissme < bestsme) { |
2130 | 26.0k | bestsme = thissme; |
2131 | 26.0k | mode_mv[NEWMV].as_int = d->bmi.mv.as_int; |
2132 | 477k | } else { |
2133 | 477k | d->bmi.mv.as_int = mode_mv[NEWMV].as_int; |
2134 | 477k | } |
2135 | 503k | } |
2136 | | |
2137 | 784k | x->mv_col_min = tmp_col_min; |
2138 | 784k | x->mv_col_max = tmp_col_max; |
2139 | 784k | x->mv_row_min = tmp_row_min; |
2140 | 784k | x->mv_row_max = tmp_row_max; |
2141 | | |
2142 | 784k | if (bestsme < INT_MAX) { |
2143 | 784k | int dis; /* TODO: use dis in distortion calculation later. */ |
2144 | 784k | unsigned int sse; |
2145 | 784k | cpi->find_fractional_mv_step( |
2146 | 784k | x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit, |
2147 | 784k | &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse); |
2148 | 784k | } |
2149 | | |
2150 | 784k | mode_mv[NEWMV].as_int = d->bmi.mv.as_int; |
2151 | | |
2152 | | /* Add the new motion vector cost to our rolling cost variable */ |
2153 | 784k | rd.rate2 += |
2154 | 784k | vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); |
2155 | 784k | } |
2156 | | // fall through |
2157 | | |
2158 | 1.70M | case NEARESTMV: |
2159 | 2.62M | case NEARMV: |
2160 | | /* Clip "next_nearest" so that it does not extend to far out |
2161 | | * of image |
2162 | | */ |
2163 | 2.62M | vp8_clamp_mv2(&mode_mv[this_mode], xd); |
2164 | | |
2165 | | /* Do not bother proceeding if the vector (from newmv, nearest |
2166 | | * or near) is 0,0 as this should then be coded using the zeromv |
2167 | | * mode. |
2168 | | */ |
2169 | 2.62M | if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && |
2170 | 2.62M | (mode_mv[this_mode].as_int == 0)) { |
2171 | 1.28M | continue; |
2172 | 1.28M | } |
2173 | | // fall through |
2174 | | |
2175 | 2.26M | case ZEROMV: |
2176 | | |
2177 | | /* Trap vectors that reach beyond the UMV borders |
2178 | | * Note that ALL New MV, Nearest MV Near MV and Zero MV code |
2179 | | * drops through to this point because of the lack of break |
2180 | | * statements in the previous two cases. |
2181 | | */ |
2182 | 2.26M | if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || |
2183 | 2.26M | ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || |
2184 | 2.26M | ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || |
2185 | 2.26M | ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { |
2186 | 0 | continue; |
2187 | 0 | } |
2188 | | |
2189 | 2.26M | vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); |
2190 | 2.26M | this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); |
2191 | 2.26M | break; |
2192 | | |
2193 | 0 | default: break; |
2194 | 6.45M | } |
2195 | | |
2196 | 5.16M | this_rd = |
2197 | 5.16M | calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip, |
2198 | 5.16M | uv_intra_tteob, intra_rd_penalty, cpi, x); |
2199 | | |
2200 | | /* Keep record of best intra distortion */ |
2201 | 5.16M | if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && |
2202 | 5.16M | (this_rd < best_mode.intra_rd)) { |
2203 | 821k | best_mode.intra_rd = this_rd; |
2204 | 821k | *returnintra = rd.distortion2; |
2205 | 821k | } |
2206 | 5.16M | #if CONFIG_TEMPORAL_DENOISING |
2207 | 5.16M | if (cpi->oxcf.noise_sensitivity) { |
2208 | 0 | unsigned int sse; |
2209 | 0 | vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse, |
2210 | 0 | mode_mv[this_mode]); |
2211 | |
|
2212 | 0 | if (sse < best_rd_sse) best_rd_sse = sse; |
2213 | | |
2214 | | /* Store for later use by denoiser. */ |
2215 | 0 | if (this_mode == ZEROMV && sse < zero_mv_sse) { |
2216 | 0 | zero_mv_sse = sse; |
2217 | 0 | x->best_zeromv_reference_frame = |
2218 | 0 | x->e_mbd.mode_info_context->mbmi.ref_frame; |
2219 | 0 | } |
2220 | | |
2221 | | /* Store the best NEWMV in x for later use in the denoiser. */ |
2222 | 0 | if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) { |
2223 | 0 | best_sse = sse; |
2224 | 0 | vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse, |
2225 | 0 | mode_mv[this_mode]); |
2226 | 0 | x->best_sse_inter_mode = NEWMV; |
2227 | 0 | x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; |
2228 | 0 | x->need_to_clamp_best_mvs = |
2229 | 0 | x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; |
2230 | 0 | x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame; |
2231 | 0 | } |
2232 | 0 | } |
2233 | 5.16M | #endif |
2234 | | |
2235 | | /* Did this mode help.. i.i is it the new best mode */ |
2236 | 5.16M | if (this_rd < best_mode.rd || x->skip) { |
2237 | | /* Note index of best mode so far */ |
2238 | 1.76M | best_mode_index = mode_index; |
2239 | 1.76M | *returnrate = rd.rate2; |
2240 | 1.76M | *returndistortion = rd.distortion2; |
2241 | 1.76M | if (this_mode <= B_PRED) { |
2242 | 712k | x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode; |
2243 | | /* required for left and above block mv */ |
2244 | 712k | x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; |
2245 | 712k | } |
2246 | 1.76M | update_best_mode(&best_mode, this_rd, &rd, other_cost, x); |
2247 | | |
2248 | | /* Testing this mode gave rise to an improvement in best error |
2249 | | * score. Lower threshold a bit for next time |
2250 | | */ |
2251 | 1.76M | x->rd_thresh_mult[mode_index] = |
2252 | 1.76M | (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) |
2253 | 1.76M | ? x->rd_thresh_mult[mode_index] - 2 |
2254 | 1.76M | : MIN_THRESHMULT; |
2255 | 1.76M | } |
2256 | | |
2257 | | /* If the mode did not help improve the best error case then raise |
2258 | | * the threshold for testing that mode next time around. |
2259 | | */ |
2260 | 3.39M | else { |
2261 | 3.39M | x->rd_thresh_mult[mode_index] += 4; |
2262 | | |
2263 | 3.39M | if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) { |
2264 | 1.69M | x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; |
2265 | 1.69M | } |
2266 | 3.39M | } |
2267 | 5.16M | x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * |
2268 | 5.16M | x->rd_thresh_mult[mode_index]; |
2269 | | |
2270 | 5.16M | if (x->skip) break; |
2271 | 5.16M | } |
2272 | | |
2273 | | /* Reduce the activation RD thresholds for the best choice mode */ |
2274 | 552k | if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && |
2275 | 552k | (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { |
2276 | 345k | int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2); |
2277 | | |
2278 | 345k | x->rd_thresh_mult[best_mode_index] = |
2279 | 345k | (x->rd_thresh_mult[best_mode_index] >= |
2280 | 345k | (MIN_THRESHMULT + best_adjustment)) |
2281 | 345k | ? x->rd_thresh_mult[best_mode_index] - best_adjustment |
2282 | 345k | : MIN_THRESHMULT; |
2283 | 345k | x->rd_threshes[best_mode_index] = |
2284 | 345k | (cpi->rd_baseline_thresh[best_mode_index] >> 7) * |
2285 | 345k | x->rd_thresh_mult[best_mode_index]; |
2286 | 345k | } |
2287 | | |
2288 | 552k | #if CONFIG_TEMPORAL_DENOISING |
2289 | 552k | if (cpi->oxcf.noise_sensitivity) { |
2290 | 0 | int block_index = mb_row * cpi->common.mb_cols + mb_col; |
2291 | 0 | if (x->best_sse_inter_mode == DC_PRED) { |
2292 | | /* No best MV found. */ |
2293 | 0 | x->best_sse_inter_mode = best_mode.mbmode.mode; |
2294 | 0 | x->best_sse_mv = best_mode.mbmode.mv; |
2295 | 0 | x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; |
2296 | 0 | x->best_reference_frame = best_mode.mbmode.ref_frame; |
2297 | 0 | best_sse = best_rd_sse; |
2298 | 0 | } |
2299 | 0 | vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, |
2300 | 0 | recon_yoffset, recon_uvoffset, &cpi->common.lf_info, |
2301 | 0 | mb_row, mb_col, block_index, 0); |
2302 | | |
2303 | | /* Reevaluate ZEROMV after denoising. */ |
2304 | 0 | if (best_mode.mbmode.ref_frame == INTRA_FRAME && |
2305 | 0 | x->best_zeromv_reference_frame != INTRA_FRAME) { |
2306 | 0 | int this_rd = INT_MAX; |
2307 | 0 | int disable_skip = 0; |
2308 | 0 | int other_cost = 0; |
2309 | 0 | int this_ref_frame = x->best_zeromv_reference_frame; |
2310 | 0 | rd.rate2 = |
2311 | 0 | x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts); |
2312 | 0 | rd.distortion2 = 0; |
2313 | | |
2314 | | /* set up the proper prediction buffers for the frame */ |
2315 | 0 | x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; |
2316 | 0 | x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; |
2317 | 0 | x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; |
2318 | 0 | x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; |
2319 | |
|
2320 | 0 | x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; |
2321 | 0 | x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; |
2322 | 0 | x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; |
2323 | |
|
2324 | 0 | this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); |
2325 | 0 | this_rd = |
2326 | 0 | calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip, |
2327 | 0 | uv_intra_tteob, intra_rd_penalty, cpi, x); |
2328 | 0 | if (this_rd < best_mode.rd || x->skip) { |
2329 | 0 | *returnrate = rd.rate2; |
2330 | 0 | *returndistortion = rd.distortion2; |
2331 | 0 | update_best_mode(&best_mode, this_rd, &rd, other_cost, x); |
2332 | 0 | } |
2333 | 0 | } |
2334 | 0 | } |
2335 | 552k | #endif |
2336 | | |
2337 | 552k | if (cpi->is_src_frame_alt_ref && |
2338 | 552k | (best_mode.mbmode.mode != ZEROMV || |
2339 | 0 | best_mode.mbmode.ref_frame != ALTREF_FRAME)) { |
2340 | 0 | x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; |
2341 | 0 | x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME; |
2342 | 0 | x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; |
2343 | 0 | x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; |
2344 | 0 | x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = |
2345 | 0 | (cpi->common.mb_no_coeff_skip); |
2346 | 0 | x->e_mbd.mode_info_context->mbmi.partitioning = 0; |
2347 | 0 | return; |
2348 | 0 | } |
2349 | | |
2350 | | /* macroblock modes */ |
2351 | 552k | memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, |
2352 | 552k | sizeof(MB_MODE_INFO)); |
2353 | | |
2354 | 552k | if (best_mode.mbmode.mode == B_PRED) { |
2355 | 2.47M | for (i = 0; i < 16; ++i) { |
2356 | 2.32M | xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode; |
2357 | 2.32M | } |
2358 | 145k | } |
2359 | | |
2360 | 552k | if (best_mode.mbmode.mode == SPLITMV) { |
2361 | 1.93M | for (i = 0; i < 16; ++i) { |
2362 | 1.81M | xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int; |
2363 | 1.81M | } |
2364 | | |
2365 | 113k | memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO)); |
2366 | | |
2367 | 113k | x->e_mbd.mode_info_context->mbmi.mv.as_int = |
2368 | 113k | x->partition_info->bmi[15].mv.as_int; |
2369 | 113k | } |
2370 | | |
2371 | 552k | if (sign_bias != |
2372 | 552k | cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) { |
2373 | 0 | best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; |
2374 | 0 | } |
2375 | | |
2376 | 552k | rd_update_mvcount(x, &best_ref_mv); |
2377 | 552k | } |
2378 | | |
2379 | 346k | void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) { |
2380 | 346k | int error4x4, error16x16; |
2381 | 346k | int rate4x4, rate16x16 = 0, rateuv; |
2382 | 346k | int dist4x4, dist16x16, distuv; |
2383 | 346k | int rate_; |
2384 | 346k | int rate4x4_tokenonly = 0; |
2385 | 346k | int rate16x16_tokenonly = 0; |
2386 | 346k | int rateuv_tokenonly = 0; |
2387 | | |
2388 | 346k | x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; |
2389 | | |
2390 | 346k | rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv); |
2391 | 346k | rate_ = rateuv; |
2392 | | |
2393 | 346k | error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly, |
2394 | 346k | &dist16x16); |
2395 | | |
2396 | 346k | error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly, |
2397 | 346k | &dist4x4, error16x16); |
2398 | | |
2399 | 346k | if (error4x4 < error16x16) { |
2400 | 187k | x->e_mbd.mode_info_context->mbmi.mode = B_PRED; |
2401 | 187k | rate_ += rate4x4; |
2402 | 187k | } else { |
2403 | 158k | rate_ += rate16x16; |
2404 | 158k | } |
2405 | | |
2406 | 346k | *rate = rate_; |
2407 | 346k | } |