Coverage Report

Created: 2026-05-23 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.54M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
148k
                    [ENTROPY_NODES]) {
116
148k
  int i, j, k;
117
118
744k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
5.35M
    for (j = 0; j < COEF_BANDS; ++j) {
120
19.0M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
14.2M
        if (k == 0 && j > (i == 0)) {
125
4.01M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
10.2M
        } else {
127
10.2M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
10.2M
        }
129
14.2M
      }
130
4.76M
    }
131
595k
  }
132
148k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
148k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
148k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
148k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
148k
}
162
163
148k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
148k
  int q;
165
148k
  int i;
166
148k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
148k
  double rdconst = 2.80;
168
169
148k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
148k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
148k
  if (cpi->mb.zbin_over_quant > 0) {
178
30.6k
    double oq_factor;
179
30.6k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
30.6k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
30.6k
    modq = (int)((double)capped_q * oq_factor);
186
30.6k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
30.6k
  }
188
189
148k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
148k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
148k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
148k
  vp8_set_speed_features(cpi);
202
203
3.12M
  for (i = 0; i < MAX_MODES; ++i) {
204
2.97M
    x->mode_test_hit_counts[i] = 0;
205
2.97M
  }
206
207
148k
  q = (int)pow(Qvalue, 1.25);
208
209
148k
  if (q < 8) q = 8;
210
211
148k
  if (cpi->RDMULT > 1000) {
212
83.9k
    cpi->RDDIV = 1;
213
83.9k
    cpi->RDMULT /= 100;
214
215
1.76M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.67M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.59M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.59M
      } else {
219
80.2k
        x->rd_threshes[i] = INT_MAX;
220
80.2k
      }
221
222
1.67M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.67M
    }
224
83.9k
  } else {
225
64.9k
    cpi->RDDIV = 100;
226
227
1.36M
    for (i = 0; i < MAX_MODES; ++i) {
228
1.29M
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
1.21M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
1.21M
      } else {
231
87.3k
        x->rd_threshes[i] = INT_MAX;
232
87.3k
      }
233
234
1.29M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
1.29M
    }
236
64.9k
  }
237
238
148k
  {
239
    /* build token cost array for the type of frame we have now */
240
148k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
148k
    if (cpi->common.refresh_alt_ref_frame) {
243
35.3k
      l = &cpi->lfc_a;
244
113k
    } else if (cpi->common.refresh_golden_frame) {
245
10.8k
      l = &cpi->lfc_g;
246
10.8k
    }
247
248
148k
    fill_token_costs(cpi->mb.token_costs,
249
148k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
148k
    vp8_init_mode_costs(cpi);
258
148k
  }
259
148k
}
260
261
55.8k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
55.8k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
55.8k
  milliseconds_for_compress =
265
55.8k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
55.8k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
55.8k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
55.8k
          milliseconds_for_compress) {
284
55.8k
    if (cpi->avg_pick_mode_time == 0) {
285
3.04k
      cpi->Speed = 4;
286
52.8k
    } else {
287
52.8k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
1
        cpi->Speed += 2;
289
1
        cpi->avg_pick_mode_time = 0;
290
1
        cpi->avg_encode_time = 0;
291
292
1
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
1
      }
296
297
52.8k
      if (milliseconds_for_compress * 100 >
298
52.8k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
52.7k
        cpi->Speed -= 1;
300
52.7k
        cpi->avg_pick_mode_time = 0;
301
52.7k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
52.7k
        if (cpi->Speed < 4) {
305
52.7k
          cpi->Speed = 4;
306
52.7k
        }
307
52.7k
      }
308
52.8k
    }
309
55.8k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
55.8k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
13.5k
int VP8_UVSSE(MACROBLOCK *x) {
372
13.5k
  unsigned char *uptr, *vptr;
373
13.5k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
13.5k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
13.5k
  int uv_stride = x->block[16].src_stride;
376
377
13.5k
  unsigned int sse1 = 0;
378
13.5k
  unsigned int sse2 = 0;
379
13.5k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
13.5k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
13.5k
  int offset;
382
13.5k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
13.5k
  if (mv_row < 0) {
385
608
    mv_row -= 1;
386
12.9k
  } else {
387
12.9k
    mv_row += 1;
388
12.9k
  }
389
390
13.5k
  if (mv_col < 0) {
391
1.18k
    mv_col -= 1;
392
12.3k
  } else {
393
12.3k
    mv_col += 1;
394
12.3k
  }
395
396
13.5k
  mv_row /= 2;
397
13.5k
  mv_col /= 2;
398
399
13.5k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
13.5k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
13.5k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
13.5k
  if ((mv_row | mv_col) & 7) {
404
2.67k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
2.67k
                              upred_ptr, uv_stride, &sse2);
406
2.67k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
2.67k
                              vpred_ptr, uv_stride, &sse1);
408
2.67k
    sse2 += sse1;
409
10.8k
  } else {
410
10.8k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
10.8k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
10.8k
    sse2 += sse1;
413
10.8k
  }
414
13.5k
  return sse2;
415
13.5k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
442M
                       ENTROPY_CONTEXT *l) {
419
442M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
442M
  int eob = (int)(*b->eob);
421
442M
  int pt; /* surrounding block/prev coef predictor */
422
442M
  int cost = 0;
423
442M
  short *qcoeff_ptr = b->qcoeff;
424
425
442M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
442M
  assert(eob <= 16);
428
4.23G
  for (; c < eob; ++c) {
429
3.79G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
3.79G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
3.79G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
3.79G
    cost += vp8_dct_value_cost_ptr[v];
433
3.79G
    pt = vp8_prev_token_class[t];
434
3.79G
  }
435
436
442M
  if (c < 16) {
437
290M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
290M
  }
439
440
442M
  pt = (c != !type); /* is eob first coefficient; */
441
442M
  *a = *l = pt;
442
443
442M
  return cost;
444
442M
}
445
446
8.23M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
8.23M
  int cost = 0;
448
8.23M
  int b;
449
8.23M
  MACROBLOCKD *x = &mb->e_mbd;
450
8.23M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
8.23M
  ENTROPY_CONTEXT *ta;
452
8.23M
  ENTROPY_CONTEXT *tl;
453
454
8.23M
  t_above = *mb->e_mbd.above_context;
455
8.23M
  t_left = *mb->e_mbd.left_context;
456
457
8.23M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
8.23M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
139M
  for (b = 0; b < 16; ++b) {
461
131M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
131M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
131M
  }
464
465
8.23M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
8.23M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
8.23M
  return cost;
469
8.23M
}
470
471
8.23M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
8.23M
  int b;
473
8.23M
  MACROBLOCKD *const x = &mb->e_mbd;
474
8.23M
  BLOCK *const mb_y2 = mb->block + 24;
475
8.23M
  BLOCKD *const x_y2 = x->block + 24;
476
8.23M
  short *Y2DCPtr = mb_y2->src_diff;
477
8.23M
  BLOCK *beptr;
478
8.23M
  int d;
479
480
8.23M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
8.23M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
74.0M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
65.8M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
65.8M
    *Y2DCPtr++ = beptr->coeff[0];
487
65.8M
    *Y2DCPtr++ = beptr->coeff[16];
488
65.8M
  }
489
490
  /* 2nd order fdct */
491
8.23M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
139M
  for (b = 0; b < 16; ++b) {
495
131M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
131M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
8.23M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
8.23M
  d = vp8_mbblock_error(mb, 1) << 2;
503
8.23M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
8.23M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
8.23M
  *Rate = vp8_rdcost_mby(mb);
509
8.23M
}
510
511
25.9M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
25.9M
  const unsigned int *p = (const unsigned int *)predictor;
513
25.9M
  unsigned int *d = (unsigned int *)dst;
514
25.9M
  d[0] = p[0];
515
25.9M
  d[4] = p[4];
516
25.9M
  d[8] = p[8];
517
25.9M
  d[12] = p[12];
518
25.9M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
13.9M
                                 int *bestdistortion) {
526
13.9M
  B_PREDICTION_MODE mode;
527
13.9M
  int best_rd = INT_MAX;
528
13.9M
  int rate = 0;
529
13.9M
  int distortion;
530
531
13.9M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
13.9M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
13.9M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
13.9M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
13.9M
  int dst_stride = x->e_mbd.dst.y_stride;
541
13.9M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
13.9M
  unsigned char *Above = dst - dst_stride;
544
13.9M
  unsigned char *yleft = dst - 1;
545
13.9M
  unsigned char top_left = Above[-1];
546
547
153M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
139M
    int this_rd;
549
139M
    int ratey;
550
551
139M
    rate = bmode_costs[mode];
552
553
139M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
139M
                         top_left);
555
139M
    vp8_subtract_b(be, b, 16);
556
139M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
139M
    x->quantize_b(be, b);
558
559
139M
    tempa = ta;
560
139M
    templ = tl;
561
562
139M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
139M
    rate += ratey;
564
139M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
139M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
139M
    if (this_rd < best_rd) {
569
25.9M
      *bestrate = rate;
570
25.9M
      *bestratey = ratey;
571
25.9M
      *bestdistortion = distortion;
572
25.9M
      best_rd = this_rd;
573
25.9M
      *best_mode = mode;
574
25.9M
      *a = tempa;
575
25.9M
      *l = templ;
576
25.9M
      copy_predictor(best_predictor, b->predictor);
577
25.9M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
25.9M
    }
579
139M
  }
580
13.9M
  b->bmi.as_mode = *best_mode;
581
582
13.9M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
13.9M
  return best_rd;
585
13.9M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.25M
                                     int *Distortion, int best_rd) {
589
1.25M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.25M
  int i;
591
1.25M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.25M
  int distortion = 0;
593
1.25M
  int tot_rate_y = 0;
594
1.25M
  int64_t total_rd = 0;
595
1.25M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.25M
  ENTROPY_CONTEXT *ta;
597
1.25M
  ENTROPY_CONTEXT *tl;
598
1.25M
  const int *bmode_costs;
599
600
1.25M
  t_above = *mb->e_mbd.above_context;
601
1.25M
  t_left = *mb->e_mbd.left_context;
602
603
1.25M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.25M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.25M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.25M
  bmode_costs = mb->inter_bmode_costs;
609
610
14.5M
  for (i = 0; i < 16; ++i) {
611
13.9M
    MODE_INFO *const mic = xd->mode_info_context;
612
13.9M
    const int mis = xd->mode_info_stride;
613
13.9M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
13.9M
    int r = 0, ry = 0, d = 0;
615
616
13.9M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
8.06M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
8.06M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
8.06M
      bmode_costs = mb->bmode_costs[A][L];
621
8.06M
    }
622
623
13.9M
    total_rd += rd_pick_intra4x4block(
624
13.9M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
13.9M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
13.9M
    cost += r;
628
13.9M
    distortion += d;
629
13.9M
    tot_rate_y += ry;
630
631
13.9M
    assert(best_mode != B_MODE_COUNT);
632
13.9M
    mic->bmi[i].as_mode = best_mode;
633
634
13.9M
    if (total_rd >= (int64_t)best_rd) break;
635
13.9M
  }
636
637
1.25M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
576k
  *Rate = cost;
640
576k
  *rate_y = tot_rate_y;
641
576k
  *Distortion = distortion;
642
643
576k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.25M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
739k
                                      int *Distortion) {
648
739k
  MB_PREDICTION_MODE mode;
649
739k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
739k
  int rate, ratey;
651
739k
  int distortion;
652
739k
  int best_rd = INT_MAX;
653
739k
  int this_rd;
654
739k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
3.69M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
2.95M
    xd->mode_info_context->mbmi.mode = mode;
659
660
2.95M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
2.95M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
2.95M
                                     xd->predictor, 16);
663
664
2.95M
    macro_block_yrd(x, &ratey, &distortion);
665
2.95M
    rate = ratey +
666
2.95M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
2.95M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
2.95M
    if (this_rd < best_rd) {
671
921k
      mode_selected = mode;
672
921k
      best_rd = this_rd;
673
921k
      *Rate = rate;
674
921k
      *rate_y = ratey;
675
921k
      *Distortion = distortion;
676
921k
    }
677
2.95M
  }
678
679
739k
  assert(mode_selected != MB_MODE_COUNT);
680
739k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
739k
  return best_rd;
682
739k
}
683
684
9.07M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
9.07M
  int b;
686
9.07M
  int cost = 0;
687
9.07M
  MACROBLOCKD *x = &mb->e_mbd;
688
9.07M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
9.07M
  ENTROPY_CONTEXT *ta;
690
9.07M
  ENTROPY_CONTEXT *tl;
691
692
9.07M
  t_above = *mb->e_mbd.above_context;
693
9.07M
  t_left = *mb->e_mbd.left_context;
694
695
9.07M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
9.07M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
81.7M
  for (b = 16; b < 24; ++b) {
699
72.6M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
72.6M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
72.6M
  }
702
703
9.07M
  return cost;
704
9.07M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
2.74M
                            int *distortion, int fullpixel) {
708
2.74M
  (void)cpi;
709
2.74M
  (void)fullpixel;
710
711
2.74M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
2.74M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
2.74M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
2.74M
                    &x->e_mbd.predictor[320], 8);
715
716
2.74M
  vp8_transform_mbuv(x);
717
2.74M
  vp8_quantize_mbuv(x);
718
719
2.74M
  *rate = rd_cost_mbuv(x);
720
2.74M
  *distortion = vp8_mbuverror(x) / 4;
721
722
2.74M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
2.74M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
387k
                          int *distortion, int fullpixel) {
727
387k
  (void)cpi;
728
387k
  (void)fullpixel;
729
730
387k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
387k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
387k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
387k
                    &x->e_mbd.predictor[320], 8);
734
735
387k
  vp8_transform_mbuv(x);
736
387k
  vp8_quantize_mbuv(x);
737
738
387k
  *rate = rd_cost_mbuv(x);
739
387k
  *distortion = vp8_mbuverror(x) / 4;
740
741
387k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
387k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.48M
                                    int *rate_tokenonly, int *distortion) {
746
1.48M
  MB_PREDICTION_MODE mode;
747
1.48M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.48M
  int best_rd = INT_MAX;
749
1.48M
  int d = 0, r = 0;
750
1.48M
  int rate_to;
751
1.48M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
7.43M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
5.94M
    int this_rate;
755
5.94M
    int this_distortion;
756
5.94M
    int this_rd;
757
758
5.94M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
5.94M
    vp8_build_intra_predictors_mbuv_s(
761
5.94M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
5.94M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
5.94M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
5.94M
        &xd->predictor[320], 8);
765
766
5.94M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
5.94M
                      x->src.uv_stride, &xd->predictor[256],
768
5.94M
                      &xd->predictor[320], 8);
769
5.94M
    vp8_transform_mbuv(x);
770
5.94M
    vp8_quantize_mbuv(x);
771
772
5.94M
    rate_to = rd_cost_mbuv(x);
773
5.94M
    this_rate =
774
5.94M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
5.94M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
5.94M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
5.94M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
5.94M
    if (this_rd < best_rd) {
782
1.83M
      best_rd = this_rd;
783
1.83M
      d = this_distortion;
784
1.83M
      r = this_rate;
785
1.83M
      *rate_tokenonly = rate_to;
786
1.83M
      mode_selected = mode;
787
1.83M
    }
788
5.94M
  }
789
790
1.48M
  *rate = r;
791
1.48M
  *distortion = d;
792
793
1.48M
  assert(mode_selected != MB_MODE_COUNT);
794
1.48M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.48M
}
796
797
6.98M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
6.98M
  vp8_prob p[VP8_MVREFS - 1];
799
6.98M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
6.98M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
6.98M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
6.98M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
6.98M
}
804
805
2.74M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
2.74M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
2.74M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
2.74M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
29.9M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
29.9M
  MACROBLOCKD *const xd = &x->e_mbd;
814
29.9M
  MODE_INFO *const mic = xd->mode_info_context;
815
29.9M
  const int mis = xd->mode_info_stride;
816
817
29.9M
  int cost = 0;
818
29.9M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
29.9M
  int i = 0;
825
826
478M
  do {
827
478M
    BLOCKD *const d = xd->block + i;
828
478M
    const int row = i >> 2, col = i & 3;
829
830
478M
    B_PREDICTION_MODE m;
831
832
478M
    if (labelings[i] != which_label) continue;
833
834
117M
    if (col && labelings[i] == labelings[i - 1]) {
835
60.6M
      m = LEFT4X4;
836
60.6M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
26.6M
      m = ABOVE4X4;
838
29.9M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
29.9M
      switch (m = this_mode) {
843
8.21M
        case NEW4X4:
844
8.21M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.21M
          break;
846
8.76M
        case LEFT4X4:
847
8.76M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
8.76M
          break;
849
6.79M
        case ABOVE4X4:
850
6.79M
          this_mv->as_int =
851
6.79M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
6.79M
          break;
853
6.14M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
29.9M
      }
856
857
29.9M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
6.79M
        int_mv left_mv;
859
860
6.79M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
6.79M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
6.79M
      }
864
865
29.9M
      cost = x->inter_bmode_costs[m];
866
29.9M
    }
867
868
117M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
117M
    x->partition_info->bmi[i].mode = m;
871
117M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
478M
  } while (++i < 16);
874
875
29.9M
  cost += thismvcost;
876
29.9M
  return cost;
877
29.9M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
23.3M
                              ENTROPY_CONTEXT *tl) {
882
23.3M
  int cost = 0;
883
23.3M
  int b;
884
23.3M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
397M
  for (b = 0; b < 16; ++b) {
887
373M
    if (labels[b] == which_label) {
888
91.1M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
91.1M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
91.1M
    }
891
373M
  }
892
893
23.3M
  return cost;
894
23.3M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
23.3M
                                                int which_label) {
898
23.3M
  int i;
899
23.3M
  unsigned int distortion = 0;
900
23.3M
  int pre_stride = x->e_mbd.pre.y_stride;
901
23.3M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
397M
  for (i = 0; i < 16; ++i) {
904
373M
    if (labels[i] == which_label) {
905
91.1M
      BLOCKD *bd = &x->e_mbd.block[i];
906
91.1M
      BLOCK *be = &x->block[i];
907
908
91.1M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
91.1M
                                   x->e_mbd.subpixel_predict);
910
91.1M
      vp8_subtract_b(be, bd, 16);
911
91.1M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
91.1M
      x->quantize_b(be, bd);
913
914
91.1M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
91.1M
    }
916
373M
  }
917
918
23.3M
  return distortion;
919
23.3M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.61M
                             unsigned int segmentation) {
946
1.61M
  int i;
947
1.61M
  int const *labels;
948
1.61M
  int br = 0;
949
1.61M
  int bd = 0;
950
1.61M
  B_PREDICTION_MODE this_mode;
951
952
1.61M
  int label_count;
953
1.61M
  int this_segment_rd = 0;
954
1.61M
  int label_mv_thresh;
955
1.61M
  int rate = 0;
956
1.61M
  int sbr = 0;
957
1.61M
  int sbd = 0;
958
1.61M
  int segmentyrate = 0;
959
960
1.61M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.61M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.61M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.61M
  t_above = *x->e_mbd.above_context;
966
1.61M
  t_left = *x->e_mbd.left_context;
967
968
1.61M
  vp8_zero(t_above_b);
969
1.61M
  vp8_zero(t_left_b);
970
971
1.61M
  br = 0;
972
1.61M
  bd = 0;
973
974
1.61M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.61M
  labels = vp8_mbsplits[segmentation];
976
1.61M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.61M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.61M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.61M
                        vp8_mbsplit_encodings + segmentation);
987
1.61M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.61M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.61M
  br += rate;
990
991
6.86M
  for (i = 0; i < label_count; ++i) {
992
6.08M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.08M
    int best_label_rd = INT_MAX;
994
6.08M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.08M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
29.9M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
24.3M
      int this_rd;
1000
24.3M
      int distortion;
1001
24.3M
      int labelyrate;
1002
24.3M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
24.3M
      ENTROPY_CONTEXT *ta_s;
1004
24.3M
      ENTROPY_CONTEXT *tl_s;
1005
1006
24.3M
      t_above_s = t_above;
1007
24.3M
      t_left_s = t_left;
1008
1009
24.3M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
24.3M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
24.3M
      if (this_mode == NEW4X4) {
1013
6.08M
        int sseshift;
1014
6.08M
        int num00;
1015
6.08M
        int step_param = 0;
1016
6.08M
        int further_steps;
1017
6.08M
        int n;
1018
6.08M
        int thissme;
1019
6.08M
        int bestsme = INT_MAX;
1020
6.08M
        int_mv temp_mv;
1021
6.08M
        BLOCK *c;
1022
6.08M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.08M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
5.59M
        if (cpi->compressor_speed) {
1030
5.59M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.43M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.43M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
333k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
333k
            }
1035
1036
1.43M
            step_param = bsi->sv_istep[i];
1037
1.43M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
5.59M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
1.89M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
1.89M
            if (i == 4 || i == 8 || i == 12) {
1045
388k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
388k
            }
1047
1.89M
            step_param = 2;
1048
1.89M
          }
1049
5.59M
        }
1050
1051
5.59M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
5.59M
        {
1054
5.59M
          int sadpb = x->sadperbit4;
1055
5.59M
          int_mv mvp_full;
1056
1057
5.59M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
5.59M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
5.59M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
5.59M
          c = &x->block[n];
1064
5.59M
          e = &x->e_mbd.block[n];
1065
1066
5.59M
          {
1067
5.59M
            bestsme = cpi->diamond_search_sad(
1068
5.59M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
5.59M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
5.59M
            n = num00;
1072
5.59M
            num00 = 0;
1073
1074
21.8M
            while (n < further_steps) {
1075
16.2M
              n++;
1076
1077
16.2M
              if (num00) {
1078
2.05M
                num00--;
1079
14.2M
              } else {
1080
14.2M
                thissme = cpi->diamond_search_sad(
1081
14.2M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
14.2M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
14.2M
                if (thissme < bestsme) {
1085
2.68M
                  bestsme = thissme;
1086
2.68M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
2.68M
                }
1088
14.2M
              }
1089
16.2M
            }
1090
5.59M
          }
1091
1092
5.59M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
5.59M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
5.59M
        }
1114
1115
5.59M
        if (bestsme < INT_MAX) {
1116
5.59M
          int disto;
1117
5.59M
          unsigned int sse;
1118
5.59M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
5.59M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
5.59M
                                       &disto, &sse);
1121
5.59M
        }
1122
5.59M
      } /* NEW4X4 */
1123
1124
23.8M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
23.8M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
23.8M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
23.8M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
23.6M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
23.6M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
485k
        continue;
1133
485k
      }
1134
1135
23.3M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
23.3M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
23.3M
      rate += labelyrate;
1139
1140
23.3M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
23.3M
      if (this_rd < best_label_rd) {
1143
10.3M
        sbr = rate;
1144
10.3M
        sbd = distortion;
1145
10.3M
        bestlabelyrate = labelyrate;
1146
10.3M
        mode_selected = this_mode;
1147
10.3M
        best_label_rd = this_rd;
1148
1149
10.3M
        t_above_b = t_above_s;
1150
10.3M
        t_left_b = t_left_s;
1151
10.3M
      }
1152
23.3M
    } /*for each 4x4 mode*/
1153
1154
6.08M
    t_above = t_above_b;
1155
6.08M
    t_left = t_left_b;
1156
1157
6.08M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.08M
                bsi->ref_mv, x->mvcost);
1159
1160
6.08M
    br += sbr;
1161
6.08M
    bd += sbd;
1162
6.08M
    segmentyrate += bestlabelyrate;
1163
6.08M
    this_segment_rd += best_label_rd;
1164
1165
6.08M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.08M
  } /* for each label */
1168
1169
1.61M
  if (this_segment_rd < bsi->segment_rd) {
1170
783k
    bsi->r = br;
1171
783k
    bsi->d = bd;
1172
783k
    bsi->segment_yrate = segmentyrate;
1173
783k
    bsi->segment_rd = this_segment_rd;
1174
783k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
13.3M
    for (i = 0; i < 16; ++i) {
1178
12.5M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
12.5M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
12.5M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
12.5M
    }
1182
783k
  }
1183
1.61M
}
1184
1185
1.54M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.54M
  int step = 0;
1187
1188
1.54M
  if (sr > MAX_FIRST_STEP) {
1189
53.2k
    sr = MAX_FIRST_STEP;
1190
1.49M
  } else if (sr < 1) {
1191
729k
    sr = 1;
1192
729k
  }
1193
1194
4.50M
  while (sr >>= 1) step++;
1195
1196
1.54M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.54M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
694k
                                           int mvthresh) {
1205
694k
  int i;
1206
694k
  BEST_SEG_INFO bsi;
1207
1208
694k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
694k
  bsi.segment_rd = best_rd;
1211
694k
  bsi.ref_mv = best_ref_mv;
1212
694k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
694k
  bsi.mvthresh = mvthresh;
1214
694k
  bsi.mdcounts = mdcounts;
1215
1216
11.8M
  for (i = 0; i < 16; ++i) {
1217
11.1M
    bsi.modes[i] = ZERO4X4;
1218
11.1M
  }
1219
1220
694k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
694k
  } else {
1228
694k
    int sr;
1229
1230
694k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
694k
    if (bsi.segment_rd < best_rd) {
1233
387k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
387k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
387k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
387k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
387k
      int tmp_col_min = x->mv_col_min;
1239
387k
      int tmp_col_max = x->mv_col_max;
1240
387k
      int tmp_row_min = x->mv_row_min;
1241
387k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
387k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
387k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
387k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
387k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
387k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
387k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
387k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
387k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
387k
      {
1260
387k
        sr =
1261
387k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
387k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
387k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
387k
        sr =
1266
387k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
387k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
387k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
387k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
387k
      }
1272
1273
      /* block 16X8 */
1274
387k
      {
1275
387k
        sr =
1276
387k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
387k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
387k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
387k
        sr =
1281
387k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
387k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
387k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
387k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
387k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
387k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
142k
      {
1293
142k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
142k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
142k
      }
1296
1297
      /* restore UMV window */
1298
387k
      x->mv_col_min = tmp_col_min;
1299
387k
      x->mv_col_max = tmp_col_max;
1300
387k
      x->mv_row_min = tmp_row_min;
1301
387k
      x->mv_row_max = tmp_row_max;
1302
387k
    }
1303
694k
  }
1304
1305
  /* set it to the best */
1306
11.8M
  for (i = 0; i < 16; ++i) {
1307
11.1M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
11.1M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
11.1M
    *bd->eob = bsi.eobs[i];
1311
11.1M
  }
1312
1313
694k
  *returntotrate = bsi.r;
1314
694k
  *returndistortion = bsi.d;
1315
694k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
694k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
694k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.54M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
2.84M
    int j;
1323
1324
2.84M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
2.84M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
2.84M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
2.84M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
694k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
694k
  return bsi.segment_rd;
1335
694k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.60M
                 int near_sadidx[]) {
1341
1.60M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.60M
  const MODE_INFO *left = here - 1;
1343
1.60M
  const MODE_INFO *aboveleft = above - 1;
1344
1.60M
  int_mv near_mvs[8];
1345
1.60M
  int near_ref[8];
1346
1.60M
  int_mv mv;
1347
1.60M
  int vcnt = 0;
1348
1.60M
  int find = 0;
1349
1.60M
  int mb_offset;
1350
1351
1.60M
  int mvx[8];
1352
1.60M
  int mvy[8];
1353
1.60M
  int i;
1354
1355
1.60M
  mv.as_int = 0;
1356
1357
1.60M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.60M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.60M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.60M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.60M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.60M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.60M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
492k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
492k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
492k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
492k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
492k
    }
1373
1.60M
    vcnt++;
1374
1.60M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
662k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
662k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
662k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
662k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
662k
    }
1380
1.60M
    vcnt++;
1381
1.60M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
389k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
389k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
389k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
389k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
389k
    }
1387
1.60M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.60M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
955k
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
955k
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
955k
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
592k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
592k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
592k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
592k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
592k
      }
1401
955k
      vcnt++;
1402
1403
      /* above in last frame */
1404
955k
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
955k
          INTRA_FRAME) {
1406
341k
        near_mvs[vcnt].as_int =
1407
341k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
341k
        mv_bias(
1409
341k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
341k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
341k
        near_ref[vcnt] =
1412
341k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
341k
      }
1414
955k
      vcnt++;
1415
1416
      /* left in last frame */
1417
955k
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
404k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
404k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
404k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
404k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
404k
      }
1423
955k
      vcnt++;
1424
1425
      /* right in last frame */
1426
955k
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
412k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
412k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
412k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
412k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
412k
      }
1432
955k
      vcnt++;
1433
1434
      /* below in last frame */
1435
955k
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
955k
          INTRA_FRAME) {
1437
337k
        near_mvs[vcnt].as_int =
1438
337k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
337k
        mv_bias(
1440
337k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
337k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
337k
        near_ref[vcnt] =
1443
337k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
337k
      }
1445
955k
      vcnt++;
1446
955k
    }
1447
1448
6.42M
    for (i = 0; i < vcnt; ++i) {
1449
5.72M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
2.03M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
906k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
906k
          find = 1;
1453
906k
          if (i < 3) {
1454
828k
            *sr = 3;
1455
828k
          } else {
1456
78.0k
            *sr = 2;
1457
78.0k
          }
1458
906k
          break;
1459
906k
        }
1460
2.03M
      }
1461
5.72M
    }
1462
1463
1.60M
    if (!find) {
1464
4.88M
      for (i = 0; i < vcnt; ++i) {
1465
4.18M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.18M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.18M
      }
1468
1469
702k
      insertsortmv(mvx, vcnt);
1470
702k
      insertsortmv(mvy, vcnt);
1471
702k
      mv.as_mv.row = mvx[vcnt / 2];
1472
702k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
702k
      *sr = 0;
1478
702k
    }
1479
1.60M
  }
1480
1481
  /* Set up return values */
1482
1.60M
  mvp->as_int = mv.as_int;
1483
1.60M
  vp8_clamp_mv2(mvp, xd);
1484
1.60M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.18M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.18M
  int near_sad[8] = { 0 };
1493
1.18M
  BLOCK *b = &x->block[0];
1494
1.18M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.18M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
86.5k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.09M
  } else if (xd->mb_to_top_edge ==
1500
1.09M
             0) { /* only has left MB for sad calculation. */
1501
454k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
454k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
454k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
644k
  } else if (xd->mb_to_left_edge ==
1505
644k
             0) { /* only has left MB for sad calculation. */
1506
108k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
108k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
108k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
108k
        xd->dst.y_stride);
1510
536k
  } else {
1511
536k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
536k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
536k
        xd->dst.y_stride);
1514
536k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
536k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
536k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
536k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
536k
        xd->dst.y_stride);
1519
536k
  }
1520
1521
1.18M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
532k
    unsigned char *pre_y_buffer =
1524
532k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
532k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
532k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
532k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
532k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
532k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
532k
    if (near_sad[4] != INT_MAX) {
1533
315k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
315k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
315k
          pre_y_stride);
1536
315k
    }
1537
532k
    if (near_sad[5] != INT_MAX) {
1538
393k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
393k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
393k
    }
1541
532k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
532k
                                               pre_y_buffer, pre_y_stride);
1543
532k
    if (near_sad[6] != INT_MAX) {
1544
396k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
396k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
396k
    }
1547
532k
    if (near_sad[7] != INT_MAX) {
1548
339k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
339k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
339k
          pre_y_stride);
1551
339k
    }
1552
532k
  }
1553
1554
1.18M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
532k
    insertsortsad(near_sad, near_sadidx, 8);
1556
653k
  } else {
1557
653k
    insertsortsad(near_sad, near_sadidx, 3);
1558
653k
  }
1559
1.18M
}
1560
1561
747k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
747k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
174k
    int i;
1564
1565
1.44M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.27M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
547k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
547k
                              best_ref_mv->as_mv.row) >>
1569
547k
                             1);
1570
547k
        const int row_idx = mv_max + row_val;
1571
547k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
547k
                              best_ref_mv->as_mv.col) >>
1573
547k
                             1);
1574
547k
        const int col_idx = mv_max + col_val;
1575
547k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
547k
            col_idx < MVvals) {
1577
547k
          x->MVcount[0][row_idx]++;
1578
547k
          x->MVcount[1][col_idx]++;
1579
547k
        }
1580
547k
      }
1581
1.27M
    }
1582
572k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
86.2k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
86.2k
                          best_ref_mv->as_mv.row) >>
1585
86.2k
                         1);
1586
86.2k
    const int row_idx = mv_max + row_val;
1587
86.2k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
86.2k
                          best_ref_mv->as_mv.col) >>
1589
86.2k
                         1);
1590
86.2k
    const int col_idx = mv_max + col_val;
1591
86.2k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
86.2k
      x->MVcount[0][row_idx]++;
1593
86.2k
      x->MVcount[1][col_idx]++;
1594
86.2k
    }
1595
86.2k
  }
1596
747k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
2.74M
                                  MACROBLOCK *x) {
1601
2.74M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
2.74M
  BLOCK *b = &x->block[0];
1603
2.74M
  MACROBLOCKD *xd = &x->e_mbd;
1604
2.74M
  int distortion;
1605
2.74M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
2.74M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
2.74M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
2.74M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
2.74M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
2.74M
  rd->rate2 += rd->rate_y;
1649
2.74M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
2.74M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
2.74M
                   cpi->common.full_pixel);
1654
2.74M
  rd->rate2 += rd->rate_uv;
1655
2.74M
  rd->distortion2 += rd->distortion_uv;
1656
2.74M
  return INT_MAX;
1657
2.74M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
6.48M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
6.48M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
6.48M
  if (cpi->common.mb_no_coeff_skip) {
1670
6.48M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
6.48M
    rd->rate2 += *other_cost;
1672
6.48M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
6.48M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
6.48M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
5.87M
    if (cpi->common.mb_no_coeff_skip) {
1685
5.87M
      int i;
1686
5.87M
      int tteob;
1687
5.87M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
5.87M
      tteob = 0;
1690
5.87M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
99.9M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
5.87M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
28.1M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.13M
      } else {
1697
2.74M
        tteob += uv_intra_tteob;
1698
2.74M
      }
1699
1700
5.87M
      if (tteob == 0) {
1701
351k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
351k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
351k
        if (cpi->prob_skip_false) {
1707
351k
          int prob_skip_cost;
1708
1709
351k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
351k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
351k
          rd->rate2 += prob_skip_cost;
1712
351k
          *other_cost += prob_skip_cost;
1713
351k
        }
1714
351k
      }
1715
5.87M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
5.87M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
5.87M
    if (this_rd < INT_MAX &&
1719
5.87M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
2.74M
      this_rd += intra_rd_penalty;
1721
2.74M
    }
1722
5.87M
  }
1723
6.48M
  return this_rd;
1724
6.48M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.41M
                             MACROBLOCK *x) {
1729
2.41M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.41M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.41M
  best_mode->yrd =
1735
2.41M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.41M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.41M
  best_mode->rd = this_rd;
1739
2.41M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.41M
  best_mode->partition = *x->partition_info;
1741
1742
2.41M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
529k
    int i;
1744
8.99M
    for (i = 0; i < 16; ++i) {
1745
8.46M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
8.46M
    }
1747
529k
  }
1748
2.41M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
747k
                            int mb_col) {
1754
747k
  BLOCK *b = &x->block[0];
1755
747k
  BLOCKD *d = &x->e_mbd.block[0];
1756
747k
  MACROBLOCKD *xd = &x->e_mbd;
1757
747k
  int_mv best_ref_mv_sb[2];
1758
747k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
747k
  int_mv best_ref_mv;
1760
747k
  int_mv *mode_mv;
1761
747k
  MB_PREDICTION_MODE this_mode;
1762
747k
  int num00;
1763
747k
  int best_mode_index = 0;
1764
747k
  BEST_MODE best_mode;
1765
1766
747k
  int i;
1767
747k
  int mode_index;
1768
747k
  int mdcounts[4];
1769
747k
  int rate;
1770
747k
  RATE_DISTORTION rd;
1771
747k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
747k
  int uv_intra_tteob = 0;
1773
747k
  int uv_intra_done = 0;
1774
1775
747k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
747k
  int_mv mvp;
1777
747k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
747k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
747k
  int sr = 0;
1781
1782
747k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
747k
  int ref_frame_map[4];
1784
747k
  int sign_bias = 0;
1785
1786
747k
  int intra_rd_penalty =
1787
747k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
747k
#if CONFIG_TEMPORAL_DENOISING
1790
747k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
747k
               best_rd_sse = UINT_MAX;
1792
747k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
747k
  rd.rate_uv = 0;
1796
747k
  rd.distortion_uv = 0;
1797
1798
747k
  mode_mv = mode_mv_sb[sign_bias];
1799
747k
  best_ref_mv.as_int = 0;
1800
747k
  best_mode.rd = INT_MAX;
1801
747k
  best_mode.yrd = INT_MAX;
1802
747k
  best_mode.intra_rd = INT_MAX;
1803
747k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
747k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
747k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
747k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
747k
  if (ref_frame_map[1] > 0) {
1814
747k
    sign_bias = vp8_find_near_mvs_bias(
1815
747k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
747k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
747k
    mode_mv = mode_mv_sb[sign_bias];
1819
747k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
747k
  }
1821
1822
747k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
747k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
747k
  x->mbs_tested_so_far++;
1827
1828
747k
  x->skip = 0;
1829
1830
15.6M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
14.9M
    int this_rd = INT_MAX;
1832
14.9M
    int disable_skip = 0;
1833
14.9M
    int other_cost = 0;
1834
14.9M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
14.9M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
13.2M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
8.21M
    rd.rate2 = 0;
1845
8.21M
    rd.distortion2 = 0;
1846
1847
8.21M
    this_mode = vp8_mode_order[mode_index];
1848
1849
8.21M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
8.21M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
8.21M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
8.21M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.17M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.17M
             plane[this_ref_frame][1] != NULL &&
1867
5.17M
             plane[this_ref_frame][2] != NULL);
1868
5.17M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.17M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.17M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.17M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.17M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
8.21M
    if (x->mode_test_hit_counts[mode_index] &&
1884
7.30M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
202k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
202k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
111k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
111k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
17.9k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
17.9k
        }
1895
1896
111k
        x->rd_threshes[mode_index] =
1897
111k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
111k
            x->rd_thresh_mult[mode_index];
1899
1900
111k
        continue;
1901
111k
      }
1902
202k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.10M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.10M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.10M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
747k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
747k
                              &uv_intra_distortion);
1936
747k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
6.72M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
747k
      uv_intra_done = 1;
1946
747k
    }
1947
1948
8.10M
    switch (this_mode) {
1949
513k
      case B_PRED: {
1950
513k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
513k
        int distortion;
1956
513k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
513k
                                           best_mode.yrd);
1958
513k
        rd.rate2 += rate;
1959
513k
        rd.distortion2 += distortion;
1960
1961
513k
        if (tmp_rd < best_mode.yrd) {
1962
216k
          assert(uv_intra_done);
1963
216k
          rd.rate2 += uv_intra_rate;
1964
216k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
216k
          rd.distortion2 += uv_intra_distortion;
1966
216k
          rd.distortion_uv = uv_intra_distortion;
1967
297k
        } else {
1968
297k
          this_rd = INT_MAX;
1969
297k
          disable_skip = 1;
1970
297k
        }
1971
513k
        break;
1972
0
      }
1973
1974
694k
      case SPLITMV: {
1975
694k
        int tmp_rd;
1976
694k
        int this_rd_thresh;
1977
694k
        int distortion;
1978
1979
694k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
694k
                             ? x->rd_threshes[THR_NEW1]
1981
694k
                             : x->rd_threshes[THR_NEW3];
1982
694k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
694k
                             ? x->rd_threshes[THR_NEW2]
1984
694k
                             : this_rd_thresh;
1985
1986
694k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
694k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
694k
            &distortion, this_rd_thresh);
1989
1990
694k
        rd.rate2 += rate;
1991
694k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
694k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
387k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
387k
                         cpi->common.full_pixel);
2000
387k
          rd.rate2 += rd.rate_uv;
2001
387k
          rd.distortion2 += rd.distortion_uv;
2002
387k
        } else {
2003
307k
          this_rd = INT_MAX;
2004
307k
          disable_skip = 1;
2005
307k
        }
2006
694k
        break;
2007
0
      }
2008
747k
      case DC_PRED:
2009
1.34M
      case V_PRED:
2010
1.94M
      case H_PRED:
2011
2.52M
      case TM_PRED: {
2012
2.52M
        int distortion;
2013
2.52M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.52M
        vp8_build_intra_predictors_mby_s(
2016
2.52M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.52M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.52M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.52M
        rd.rate2 += rd.rate_y;
2020
2.52M
        rd.distortion2 += distortion;
2021
2.52M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.52M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.52M
        assert(uv_intra_done);
2024
2.52M
        rd.rate2 += uv_intra_rate;
2025
2.52M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.52M
        rd.distortion2 += uv_intra_distortion;
2027
2.52M
        rd.distortion_uv = uv_intra_distortion;
2028
2.52M
        break;
2029
1.94M
      }
2030
2031
933k
      case NEWMV: {
2032
933k
        int thissme;
2033
933k
        int bestsme = INT_MAX;
2034
933k
        int step_param = cpi->sf.first_step;
2035
933k
        int further_steps;
2036
933k
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
933k
        int do_refine = 1;
2041
2042
933k
        int sadpb = x->sadperbit16;
2043
933k
        int_mv mvp_full;
2044
2045
933k
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
933k
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
933k
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
933k
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
933k
        int tmp_col_min = x->mv_col_min;
2051
933k
        int tmp_col_max = x->mv_col_max;
2052
933k
        int tmp_row_min = x->mv_row_min;
2053
933k
        int tmp_row_max = x->mv_row_max;
2054
2055
933k
        if (!saddone) {
2056
623k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
623k
          saddone = 1;
2058
623k
        }
2059
2060
933k
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
933k
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
933k
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
933k
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
933k
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
933k
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
933k
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
933k
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
933k
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
933k
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
933k
        {
2080
933k
          bestsme = cpi->diamond_search_sad(
2081
933k
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
933k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
933k
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
933k
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
933k
          n = num00;
2089
933k
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
933k
          if (n > further_steps) do_refine = 0;
2094
2095
4.08M
          while (n < further_steps) {
2096
3.15M
            n++;
2097
2098
3.15M
            if (num00) {
2099
283k
              num00--;
2100
2.86M
            } else {
2101
2.86M
              thissme = cpi->diamond_search_sad(
2102
2.86M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
2.86M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
2.86M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
2.86M
              if (thissme < bestsme) {
2109
441k
                bestsme = thissme;
2110
441k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.42M
              } else {
2112
2.42M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.42M
              }
2114
2.86M
            }
2115
3.15M
          }
2116
933k
        }
2117
2118
        /* final 1-away diamond refining search */
2119
933k
        if (do_refine == 1) {
2120
615k
          int search_range;
2121
2122
615k
          search_range = 8;
2123
2124
615k
          thissme = cpi->refining_search_sad(
2125
615k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
615k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
615k
          if (thissme < bestsme) {
2129
28.0k
            bestsme = thissme;
2130
28.0k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
587k
          } else {
2132
587k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
587k
          }
2134
615k
        }
2135
2136
933k
        x->mv_col_min = tmp_col_min;
2137
933k
        x->mv_col_max = tmp_col_max;
2138
933k
        x->mv_row_min = tmp_row_min;
2139
933k
        x->mv_row_max = tmp_row_max;
2140
2141
933k
        if (bestsme < INT_MAX) {
2142
933k
          int dis; /* TODO: use dis in distortion calculation later. */
2143
933k
          unsigned int sse;
2144
933k
          cpi->find_fractional_mv_step(
2145
933k
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
933k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
933k
        }
2148
2149
933k
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
933k
        rd.rate2 +=
2153
933k
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
933k
      }
2155
        // fall through
2156
2157
2.07M
      case NEARESTMV:
2158
3.21M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.21M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.21M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
2.28M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.61M
          continue;
2171
1.61M
        }
2172
        // fall through
2173
2174
2.74M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
2.74M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
2.74M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
2.74M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
2.74M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
2.74M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
2.74M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
2.74M
        break;
2191
2192
0
      default: break;
2193
8.10M
    }
2194
2195
6.48M
    this_rd =
2196
6.48M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
6.48M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
6.48M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
3.04M
        (this_rd < best_mode.intra_rd)) {
2202
1.11M
      best_mode.intra_rd = this_rd;
2203
1.11M
      *returnintra = rd.distortion2;
2204
1.11M
    }
2205
6.48M
#if CONFIG_TEMPORAL_DENOISING
2206
6.48M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
6.48M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
6.48M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.41M
      best_mode_index = mode_index;
2238
2.41M
      *returnrate = rd.rate2;
2239
2.41M
      *returndistortion = rd.distortion2;
2240
2.41M
      if (this_mode <= B_PRED) {
2241
984k
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
984k
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
984k
      }
2245
2.41M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.41M
      x->rd_thresh_mult[mode_index] =
2251
2.41M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.41M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.41M
              : MIN_THRESHMULT;
2254
2.41M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.06M
    else {
2260
4.06M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.06M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.00M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.00M
      }
2265
4.06M
    }
2266
6.48M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
6.48M
                                 x->rd_thresh_mult[mode_index];
2268
2269
6.48M
    if (x->skip) break;
2270
6.48M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
747k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
491k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
491k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
491k
    x->rd_thresh_mult[best_mode_index] =
2278
491k
        (x->rd_thresh_mult[best_mode_index] >=
2279
491k
         (MIN_THRESHMULT + best_adjustment))
2280
491k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
491k
            : MIN_THRESHMULT;
2282
491k
    x->rd_threshes[best_mode_index] =
2283
491k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
491k
        x->rd_thresh_mult[best_mode_index];
2285
491k
  }
2286
2287
747k
#if CONFIG_TEMPORAL_DENOISING
2288
747k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
747k
#endif
2335
2336
747k
  if (cpi->is_src_frame_alt_ref &&
2337
0
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
747k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
747k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.57M
    for (i = 0; i < 16; ++i) {
2354
3.36M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.36M
    }
2356
210k
  }
2357
2358
747k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
2.97M
    for (i = 0; i < 16; ++i) {
2360
2.79M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.79M
    }
2362
2363
174k
    *x->partition_info = best_mode.partition;
2364
2365
174k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
174k
        x->partition_info->bmi[15].mv.as_int;
2367
174k
  }
2368
2369
747k
  if (sign_bias !=
2370
747k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
747k
  rd_update_mvcount(x, &best_ref_mv);
2375
747k
}
2376
2377
739k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
739k
  int error4x4, error16x16;
2379
739k
  int rate4x4, rate16x16 = 0, rateuv;
2380
739k
  int dist4x4, dist16x16, distuv;
2381
739k
  int rate_;
2382
739k
  int rate4x4_tokenonly = 0;
2383
739k
  int rate16x16_tokenonly = 0;
2384
739k
  int rateuv_tokenonly = 0;
2385
2386
739k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
739k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
739k
  rate_ = rateuv;
2390
2391
739k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
739k
                                          &dist16x16);
2393
2394
739k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
739k
                                       &dist4x4, error16x16);
2396
2397
739k
  if (error4x4 < error16x16) {
2398
358k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
358k
    rate_ += rate4x4;
2400
380k
  } else {
2401
380k
    rate_ += rate16x16;
2402
380k
  }
2403
2404
739k
  *rate = rate_;
2405
739k
}