Coverage Report

Created: 2026-01-16 07:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.48M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
140k
                    [ENTROPY_NODES]) {
116
140k
  int i, j, k;
117
118
704k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
5.07M
    for (j = 0; j < COEF_BANDS; ++j) {
120
18.0M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
13.5M
        if (k == 0 && j > (i == 0)) {
125
3.80M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
9.72M
        } else {
127
9.72M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
9.72M
        }
129
13.5M
      }
130
4.51M
    }
131
563k
  }
132
140k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
140k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
140k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
140k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
140k
}
162
163
140k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
140k
  int q;
165
140k
  int i;
166
140k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
140k
  double rdconst = 2.80;
168
169
140k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
140k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
140k
  if (cpi->mb.zbin_over_quant > 0) {
178
32.3k
    double oq_factor;
179
32.3k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
32.3k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
32.3k
    modq = (int)((double)capped_q * oq_factor);
186
32.3k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
32.3k
  }
188
189
140k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
140k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
140k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
140k
  vp8_set_speed_features(cpi);
202
203
2.96M
  for (i = 0; i < MAX_MODES; ++i) {
204
2.81M
    x->mode_test_hit_counts[i] = 0;
205
2.81M
  }
206
207
140k
  q = (int)pow(Qvalue, 1.25);
208
209
140k
  if (q < 8) q = 8;
210
211
140k
  if (cpi->RDMULT > 1000) {
212
81.1k
    cpi->RDDIV = 1;
213
81.1k
    cpi->RDMULT /= 100;
214
215
1.70M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.62M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.54M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.54M
      } else {
219
77.6k
        x->rd_threshes[i] = INT_MAX;
220
77.6k
      }
221
222
1.62M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.62M
    }
224
81.1k
  } else {
225
59.8k
    cpi->RDDIV = 100;
226
227
1.25M
    for (i = 0; i < MAX_MODES; ++i) {
228
1.19M
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
1.12M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
1.12M
      } else {
231
77.1k
        x->rd_threshes[i] = INT_MAX;
232
77.1k
      }
233
234
1.19M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
1.19M
    }
236
59.8k
  }
237
238
140k
  {
239
    /* build token cost array for the type of frame we have now */
240
140k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
140k
    if (cpi->common.refresh_alt_ref_frame) {
243
30.2k
      l = &cpi->lfc_a;
244
110k
    } else if (cpi->common.refresh_golden_frame) {
245
11.3k
      l = &cpi->lfc_g;
246
11.3k
    }
247
248
140k
    fill_token_costs(cpi->mb.token_costs,
249
140k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
140k
    vp8_init_mode_costs(cpi);
258
140k
  }
259
140k
}
260
261
51.6k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
51.6k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
51.6k
  milliseconds_for_compress =
265
51.6k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
51.6k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
51.6k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
51.6k
          milliseconds_for_compress) {
284
51.6k
    if (cpi->avg_pick_mode_time == 0) {
285
2.78k
      cpi->Speed = 4;
286
48.8k
    } else {
287
48.8k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
1
        cpi->Speed += 2;
289
1
        cpi->avg_pick_mode_time = 0;
290
1
        cpi->avg_encode_time = 0;
291
292
1
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
1
      }
296
297
48.8k
      if (milliseconds_for_compress * 100 >
298
48.8k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
48.6k
        cpi->Speed -= 1;
300
48.6k
        cpi->avg_pick_mode_time = 0;
301
48.6k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
48.6k
        if (cpi->Speed < 4) {
305
48.6k
          cpi->Speed = 4;
306
48.6k
        }
307
48.6k
      }
308
48.8k
    }
309
51.6k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
51.6k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
13.5k
int VP8_UVSSE(MACROBLOCK *x) {
372
13.5k
  unsigned char *uptr, *vptr;
373
13.5k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
13.5k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
13.5k
  int uv_stride = x->block[16].src_stride;
376
377
13.5k
  unsigned int sse1 = 0;
378
13.5k
  unsigned int sse2 = 0;
379
13.5k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
13.5k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
13.5k
  int offset;
382
13.5k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
13.5k
  if (mv_row < 0) {
385
529
    mv_row -= 1;
386
13.0k
  } else {
387
13.0k
    mv_row += 1;
388
13.0k
  }
389
390
13.5k
  if (mv_col < 0) {
391
916
    mv_col -= 1;
392
12.6k
  } else {
393
12.6k
    mv_col += 1;
394
12.6k
  }
395
396
13.5k
  mv_row /= 2;
397
13.5k
  mv_col /= 2;
398
399
13.5k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
13.5k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
13.5k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
13.5k
  if ((mv_row | mv_col) & 7) {
404
1.91k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
1.91k
                              upred_ptr, uv_stride, &sse2);
406
1.91k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
1.91k
                              vpred_ptr, uv_stride, &sse1);
408
1.91k
    sse2 += sse1;
409
11.6k
  } else {
410
11.6k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
11.6k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
11.6k
    sse2 += sse1;
413
11.6k
  }
414
13.5k
  return sse2;
415
13.5k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
423M
                       ENTROPY_CONTEXT *l) {
419
423M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
423M
  int eob = (int)(*b->eob);
421
423M
  int pt; /* surrounding block/prev coef predictor */
422
423M
  int cost = 0;
423
423M
  short *qcoeff_ptr = b->qcoeff;
424
425
423M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
423M
  assert(eob <= 16);
428
4.09G
  for (; c < eob; ++c) {
429
3.67G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
3.67G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
3.67G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
3.67G
    cost += vp8_dct_value_cost_ptr[v];
433
3.67G
    pt = vp8_prev_token_class[t];
434
3.67G
  }
435
436
423M
  if (c < 16) {
437
276M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
276M
  }
439
440
423M
  pt = (c != !type); /* is eob first coefficient; */
441
423M
  *a = *l = pt;
442
443
423M
  return cost;
444
423M
}
445
446
7.99M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
7.99M
  int cost = 0;
448
7.99M
  int b;
449
7.99M
  MACROBLOCKD *x = &mb->e_mbd;
450
7.99M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
7.99M
  ENTROPY_CONTEXT *ta;
452
7.99M
  ENTROPY_CONTEXT *tl;
453
454
7.99M
  t_above = *mb->e_mbd.above_context;
455
7.99M
  t_left = *mb->e_mbd.left_context;
456
457
7.99M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
7.99M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
135M
  for (b = 0; b < 16; ++b) {
461
127M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
127M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
127M
  }
464
465
7.99M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
7.99M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
7.99M
  return cost;
469
7.99M
}
470
471
7.99M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
7.99M
  int b;
473
7.99M
  MACROBLOCKD *const x = &mb->e_mbd;
474
7.99M
  BLOCK *const mb_y2 = mb->block + 24;
475
7.99M
  BLOCKD *const x_y2 = x->block + 24;
476
7.99M
  short *Y2DCPtr = mb_y2->src_diff;
477
7.99M
  BLOCK *beptr;
478
7.99M
  int d;
479
480
7.99M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
7.99M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
71.9M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
63.9M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
63.9M
    *Y2DCPtr++ = beptr->coeff[0];
487
63.9M
    *Y2DCPtr++ = beptr->coeff[16];
488
63.9M
  }
489
490
  /* 2nd order fdct */
491
7.99M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
135M
  for (b = 0; b < 16; ++b) {
495
127M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
127M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
7.99M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
7.99M
  d = vp8_mbblock_error(mb, 1) << 2;
503
7.99M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
7.99M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
7.99M
  *Rate = vp8_rdcost_mby(mb);
509
7.99M
}
510
511
23.4M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
23.4M
  const unsigned int *p = (const unsigned int *)predictor;
513
23.4M
  unsigned int *d = (unsigned int *)dst;
514
23.4M
  d[0] = p[0];
515
23.4M
  d[4] = p[4];
516
23.4M
  d[8] = p[8];
517
23.4M
  d[12] = p[12];
518
23.4M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
12.6M
                                 int *bestdistortion) {
526
12.6M
  B_PREDICTION_MODE mode;
527
12.6M
  int best_rd = INT_MAX;
528
12.6M
  int rate = 0;
529
12.6M
  int distortion;
530
531
12.6M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
12.6M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
12.6M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
12.6M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
12.6M
  int dst_stride = x->e_mbd.dst.y_stride;
541
12.6M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
12.6M
  unsigned char *Above = dst - dst_stride;
544
12.6M
  unsigned char *yleft = dst - 1;
545
12.6M
  unsigned char top_left = Above[-1];
546
547
139M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
126M
    int this_rd;
549
126M
    int ratey;
550
551
126M
    rate = bmode_costs[mode];
552
553
126M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
126M
                         top_left);
555
126M
    vp8_subtract_b(be, b, 16);
556
126M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
126M
    x->quantize_b(be, b);
558
559
126M
    tempa = ta;
560
126M
    templ = tl;
561
562
126M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
126M
    rate += ratey;
564
126M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
126M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
126M
    if (this_rd < best_rd) {
569
23.4M
      *bestrate = rate;
570
23.4M
      *bestratey = ratey;
571
23.4M
      *bestdistortion = distortion;
572
23.4M
      best_rd = this_rd;
573
23.4M
      *best_mode = mode;
574
23.4M
      *a = tempa;
575
23.4M
      *l = templ;
576
23.4M
      copy_predictor(best_predictor, b->predictor);
577
23.4M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
23.4M
    }
579
126M
  }
580
12.6M
  b->bmi.as_mode = *best_mode;
581
582
12.6M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
12.6M
  return best_rd;
585
12.6M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.15M
                                     int *Distortion, int best_rd) {
589
1.15M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.15M
  int i;
591
1.15M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.15M
  int distortion = 0;
593
1.15M
  int tot_rate_y = 0;
594
1.15M
  int64_t total_rd = 0;
595
1.15M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.15M
  ENTROPY_CONTEXT *ta;
597
1.15M
  ENTROPY_CONTEXT *tl;
598
1.15M
  const int *bmode_costs;
599
600
1.15M
  t_above = *mb->e_mbd.above_context;
601
1.15M
  t_left = *mb->e_mbd.left_context;
602
603
1.15M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.15M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.15M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.15M
  bmode_costs = mb->inter_bmode_costs;
609
610
13.1M
  for (i = 0; i < 16; ++i) {
611
12.6M
    MODE_INFO *const mic = xd->mode_info_context;
612
12.6M
    const int mis = xd->mode_info_stride;
613
12.6M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
12.6M
    int r = 0, ry = 0, d = 0;
615
616
12.6M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
6.91M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
6.91M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
6.91M
      bmode_costs = mb->bmode_costs[A][L];
621
6.91M
    }
622
623
12.6M
    total_rd += rd_pick_intra4x4block(
624
12.6M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
12.6M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
12.6M
    cost += r;
628
12.6M
    distortion += d;
629
12.6M
    tot_rate_y += ry;
630
631
12.6M
    assert(best_mode != B_MODE_COUNT);
632
12.6M
    mic->bmi[i].as_mode = best_mode;
633
634
12.6M
    if (total_rd >= (int64_t)best_rd) break;
635
12.6M
  }
636
637
1.15M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
489k
  *Rate = cost;
640
489k
  *rate_y = tot_rate_y;
641
489k
  *Distortion = distortion;
642
643
489k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.15M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
640k
                                      int *Distortion) {
648
640k
  MB_PREDICTION_MODE mode;
649
640k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
640k
  int rate, ratey;
651
640k
  int distortion;
652
640k
  int best_rd = INT_MAX;
653
640k
  int this_rd;
654
640k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
3.20M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
2.56M
    xd->mode_info_context->mbmi.mode = mode;
659
660
2.56M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
2.56M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
2.56M
                                     xd->predictor, 16);
663
664
2.56M
    macro_block_yrd(x, &ratey, &distortion);
665
2.56M
    rate = ratey +
666
2.56M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
2.56M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
2.56M
    if (this_rd < best_rd) {
671
785k
      mode_selected = mode;
672
785k
      best_rd = this_rd;
673
785k
      *Rate = rate;
674
785k
      *rate_y = ratey;
675
785k
      *Distortion = distortion;
676
785k
    }
677
2.56M
  }
678
679
640k
  assert(mode_selected != MB_MODE_COUNT);
680
640k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
640k
  return best_rd;
682
640k
}
683
684
8.77M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
8.77M
  int b;
686
8.77M
  int cost = 0;
687
8.77M
  MACROBLOCKD *x = &mb->e_mbd;
688
8.77M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
8.77M
  ENTROPY_CONTEXT *ta;
690
8.77M
  ENTROPY_CONTEXT *tl;
691
692
8.77M
  t_above = *mb->e_mbd.above_context;
693
8.77M
  t_left = *mb->e_mbd.left_context;
694
695
8.77M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
8.77M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
78.9M
  for (b = 16; b < 24; ++b) {
699
70.1M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
70.1M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
70.1M
  }
702
703
8.77M
  return cost;
704
8.77M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
2.89M
                            int *distortion, int fullpixel) {
708
2.89M
  (void)cpi;
709
2.89M
  (void)fullpixel;
710
711
2.89M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
2.89M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
2.89M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
2.89M
                    &x->e_mbd.predictor[320], 8);
715
716
2.89M
  vp8_transform_mbuv(x);
717
2.89M
  vp8_quantize_mbuv(x);
718
719
2.89M
  *rate = rd_cost_mbuv(x);
720
2.89M
  *distortion = vp8_mbuverror(x) / 4;
721
722
2.89M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
2.89M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
371k
                          int *distortion, int fullpixel) {
727
371k
  (void)cpi;
728
371k
  (void)fullpixel;
729
730
371k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
371k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
371k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
371k
                    &x->e_mbd.predictor[320], 8);
734
735
371k
  vp8_transform_mbuv(x);
736
371k
  vp8_quantize_mbuv(x);
737
738
371k
  *rate = rd_cost_mbuv(x);
739
371k
  *distortion = vp8_mbuverror(x) / 4;
740
741
371k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
371k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.37M
                                    int *rate_tokenonly, int *distortion) {
746
1.37M
  MB_PREDICTION_MODE mode;
747
1.37M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.37M
  int best_rd = INT_MAX;
749
1.37M
  int d = 0, r = 0;
750
1.37M
  int rate_to;
751
1.37M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
6.88M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
5.50M
    int this_rate;
755
5.50M
    int this_distortion;
756
5.50M
    int this_rd;
757
758
5.50M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
5.50M
    vp8_build_intra_predictors_mbuv_s(
761
5.50M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
5.50M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
5.50M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
5.50M
        &xd->predictor[320], 8);
765
766
5.50M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
5.50M
                      x->src.uv_stride, &xd->predictor[256],
768
5.50M
                      &xd->predictor[320], 8);
769
5.50M
    vp8_transform_mbuv(x);
770
5.50M
    vp8_quantize_mbuv(x);
771
772
5.50M
    rate_to = rd_cost_mbuv(x);
773
5.50M
    this_rate =
774
5.50M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
5.50M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
5.50M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
5.50M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
5.50M
    if (this_rd < best_rd) {
782
1.72M
      best_rd = this_rd;
783
1.72M
      d = this_distortion;
784
1.72M
      r = this_rate;
785
1.72M
      *rate_tokenonly = rate_to;
786
1.72M
      mode_selected = mode;
787
1.72M
    }
788
5.50M
  }
789
790
1.37M
  *rate = r;
791
1.37M
  *distortion = d;
792
793
1.37M
  assert(mode_selected != MB_MODE_COUNT);
794
1.37M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.37M
}
796
797
6.86M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
6.86M
  vp8_prob p[VP8_MVREFS - 1];
799
6.86M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
6.86M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
6.86M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
6.86M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
6.86M
}
804
805
2.89M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
2.89M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
2.89M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
2.89M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
30.2M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
30.2M
  MACROBLOCKD *const xd = &x->e_mbd;
814
30.2M
  MODE_INFO *const mic = xd->mode_info_context;
815
30.2M
  const int mis = xd->mode_info_stride;
816
817
30.2M
  int cost = 0;
818
30.2M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
30.2M
  int i = 0;
825
826
483M
  do {
827
483M
    BLOCKD *const d = xd->block + i;
828
483M
    const int row = i >> 2, col = i & 3;
829
830
483M
    B_PREDICTION_MODE m;
831
832
483M
    if (labelings[i] != which_label) continue;
833
834
116M
    if (col && labelings[i] == labelings[i - 1]) {
835
60.0M
      m = LEFT4X4;
836
60.0M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
26.4M
      m = ABOVE4X4;
838
30.2M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
30.2M
      switch (m = this_mode) {
843
8.33M
        case NEW4X4:
844
8.33M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.33M
          break;
846
8.84M
        case LEFT4X4:
847
8.84M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
8.84M
          break;
849
6.83M
        case ABOVE4X4:
850
6.83M
          this_mv->as_int =
851
6.83M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
6.83M
          break;
853
6.20M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
30.2M
      }
856
857
30.2M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
6.83M
        int_mv left_mv;
859
860
6.83M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
6.83M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
6.83M
      }
864
865
30.2M
      cost = x->inter_bmode_costs[m];
866
30.2M
    }
867
868
116M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
116M
    x->partition_info->bmi[i].mode = m;
871
116M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
483M
  } while (++i < 16);
874
875
30.2M
  cost += thismvcost;
876
30.2M
  return cost;
877
30.2M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
23.6M
                              ENTROPY_CONTEXT *tl) {
882
23.6M
  int cost = 0;
883
23.6M
  int b;
884
23.6M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
402M
  for (b = 0; b < 16; ++b) {
887
378M
    if (labels[b] == which_label) {
888
91.1M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
91.1M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
91.1M
    }
891
378M
  }
892
893
23.6M
  return cost;
894
23.6M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
23.6M
                                                int which_label) {
898
23.6M
  int i;
899
23.6M
  unsigned int distortion = 0;
900
23.6M
  int pre_stride = x->e_mbd.pre.y_stride;
901
23.6M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
402M
  for (i = 0; i < 16; ++i) {
904
378M
    if (labels[i] == which_label) {
905
91.1M
      BLOCKD *bd = &x->e_mbd.block[i];
906
91.1M
      BLOCK *be = &x->block[i];
907
908
91.1M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
91.1M
                                   x->e_mbd.subpixel_predict);
910
91.1M
      vp8_subtract_b(be, bd, 16);
911
91.1M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
91.1M
      x->quantize_b(be, bd);
913
914
91.1M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
91.1M
    }
916
378M
  }
917
918
23.6M
  return distortion;
919
23.6M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.60M
                             unsigned int segmentation) {
946
1.60M
  int i;
947
1.60M
  int const *labels;
948
1.60M
  int br = 0;
949
1.60M
  int bd = 0;
950
1.60M
  B_PREDICTION_MODE this_mode;
951
952
1.60M
  int label_count;
953
1.60M
  int this_segment_rd = 0;
954
1.60M
  int label_mv_thresh;
955
1.60M
  int rate = 0;
956
1.60M
  int sbr = 0;
957
1.60M
  int sbd = 0;
958
1.60M
  int segmentyrate = 0;
959
960
1.60M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.60M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.60M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.60M
  t_above = *x->e_mbd.above_context;
966
1.60M
  t_left = *x->e_mbd.left_context;
967
968
1.60M
  vp8_zero(t_above_b);
969
1.60M
  vp8_zero(t_left_b);
970
971
1.60M
  br = 0;
972
1.60M
  bd = 0;
973
974
1.60M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.60M
  labels = vp8_mbsplits[segmentation];
976
1.60M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.60M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.60M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.60M
                        vp8_mbsplit_encodings + segmentation);
987
1.60M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.60M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.60M
  br += rate;
990
991
6.87M
  for (i = 0; i < label_count; ++i) {
992
6.13M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.13M
    int best_label_rd = INT_MAX;
994
6.13M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.13M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
30.2M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
24.5M
      int this_rd;
1000
24.5M
      int distortion;
1001
24.5M
      int labelyrate;
1002
24.5M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
24.5M
      ENTROPY_CONTEXT *ta_s;
1004
24.5M
      ENTROPY_CONTEXT *tl_s;
1005
1006
24.5M
      t_above_s = t_above;
1007
24.5M
      t_left_s = t_left;
1008
1009
24.5M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
24.5M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
24.5M
      if (this_mode == NEW4X4) {
1013
6.13M
        int sseshift;
1014
6.13M
        int num00;
1015
6.13M
        int step_param = 0;
1016
6.13M
        int further_steps;
1017
6.13M
        int n;
1018
6.13M
        int thissme;
1019
6.13M
        int bestsme = INT_MAX;
1020
6.13M
        int_mv temp_mv;
1021
6.13M
        BLOCK *c;
1022
6.13M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.13M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
5.67M
        if (cpi->compressor_speed) {
1030
5.67M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.39M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.39M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
331k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
331k
            }
1035
1036
1.39M
            step_param = bsi->sv_istep[i];
1037
1.39M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
5.67M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
1.92M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
1.92M
            if (i == 4 || i == 8 || i == 12) {
1045
395k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
395k
            }
1047
1.92M
            step_param = 2;
1048
1.92M
          }
1049
5.67M
        }
1050
1051
5.67M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
5.67M
        {
1054
5.67M
          int sadpb = x->sadperbit4;
1055
5.67M
          int_mv mvp_full;
1056
1057
5.67M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
5.67M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
5.67M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
5.67M
          c = &x->block[n];
1064
5.67M
          e = &x->e_mbd.block[n];
1065
1066
5.67M
          {
1067
5.67M
            bestsme = cpi->diamond_search_sad(
1068
5.67M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
5.67M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
5.67M
            n = num00;
1072
5.67M
            num00 = 0;
1073
1074
21.9M
            while (n < further_steps) {
1075
16.2M
              n++;
1076
1077
16.2M
              if (num00) {
1078
2.01M
                num00--;
1079
14.2M
              } else {
1080
14.2M
                thissme = cpi->diamond_search_sad(
1081
14.2M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
14.2M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
14.2M
                if (thissme < bestsme) {
1085
2.66M
                  bestsme = thissme;
1086
2.66M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
2.66M
                }
1088
14.2M
              }
1089
16.2M
            }
1090
5.67M
          }
1091
1092
5.67M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
5.67M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
5.67M
        }
1114
1115
5.67M
        if (bestsme < INT_MAX) {
1116
5.67M
          int disto;
1117
5.67M
          unsigned int sse;
1118
5.67M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
5.67M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
5.67M
                                       &disto, &sse);
1121
5.67M
        }
1122
5.67M
      } /* NEW4X4 */
1123
1124
24.0M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
24.0M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
24.0M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
24.0M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
23.8M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
23.8M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
422k
        continue;
1133
422k
      }
1134
1135
23.6M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
23.6M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
23.6M
      rate += labelyrate;
1139
1140
23.6M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
23.6M
      if (this_rd < best_label_rd) {
1143
10.4M
        sbr = rate;
1144
10.4M
        sbd = distortion;
1145
10.4M
        bestlabelyrate = labelyrate;
1146
10.4M
        mode_selected = this_mode;
1147
10.4M
        best_label_rd = this_rd;
1148
1149
10.4M
        t_above_b = t_above_s;
1150
10.4M
        t_left_b = t_left_s;
1151
10.4M
      }
1152
23.6M
    } /*for each 4x4 mode*/
1153
1154
6.13M
    t_above = t_above_b;
1155
6.13M
    t_left = t_left_b;
1156
1157
6.13M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.13M
                bsi->ref_mv, x->mvcost);
1159
1160
6.13M
    br += sbr;
1161
6.13M
    bd += sbd;
1162
6.13M
    segmentyrate += bestlabelyrate;
1163
6.13M
    this_segment_rd += best_label_rd;
1164
1165
6.13M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.13M
  } /* for each label */
1168
1169
1.60M
  if (this_segment_rd < bsi->segment_rd) {
1170
746k
    bsi->r = br;
1171
746k
    bsi->d = bd;
1172
746k
    bsi->segment_yrate = segmentyrate;
1173
746k
    bsi->segment_rd = this_segment_rd;
1174
746k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
12.6M
    for (i = 0; i < 16; ++i) {
1178
11.9M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
11.9M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
11.9M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
11.9M
    }
1182
746k
  }
1183
1.60M
}
1184
1185
1.48M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.48M
  int step = 0;
1187
1188
1.48M
  if (sr > MAX_FIRST_STEP) {
1189
46.6k
    sr = MAX_FIRST_STEP;
1190
1.43M
  } else if (sr < 1) {
1191
688k
    sr = 1;
1192
688k
  }
1193
1194
4.40M
  while (sr >>= 1) step++;
1195
1196
1.48M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.48M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
719k
                                           int mvthresh) {
1205
719k
  int i;
1206
719k
  BEST_SEG_INFO bsi;
1207
1208
719k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
719k
  bsi.segment_rd = best_rd;
1211
719k
  bsi.ref_mv = best_ref_mv;
1212
719k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
719k
  bsi.mvthresh = mvthresh;
1214
719k
  bsi.mdcounts = mdcounts;
1215
1216
12.2M
  for (i = 0; i < 16; ++i) {
1217
11.5M
    bsi.modes[i] = ZERO4X4;
1218
11.5M
  }
1219
1220
719k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
719k
  } else {
1228
719k
    int sr;
1229
1230
719k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
719k
    if (bsi.segment_rd < best_rd) {
1233
371k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
371k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
371k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
371k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
371k
      int tmp_col_min = x->mv_col_min;
1239
371k
      int tmp_col_max = x->mv_col_max;
1240
371k
      int tmp_row_min = x->mv_row_min;
1241
371k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
371k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
371k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
371k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
371k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
371k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
371k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
371k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
371k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
371k
      {
1260
371k
        sr =
1261
371k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
371k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
371k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
371k
        sr =
1266
371k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
371k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
371k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
371k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
371k
      }
1272
1273
      /* block 16X8 */
1274
371k
      {
1275
371k
        sr =
1276
371k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
371k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
371k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
371k
        sr =
1281
371k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
371k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
371k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
371k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
371k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
371k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
144k
      {
1293
144k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
144k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
144k
      }
1296
1297
      /* restore UMV window */
1298
371k
      x->mv_col_min = tmp_col_min;
1299
371k
      x->mv_col_max = tmp_col_max;
1300
371k
      x->mv_row_min = tmp_row_min;
1301
371k
      x->mv_row_max = tmp_row_max;
1302
371k
    }
1303
719k
  }
1304
1305
  /* set it to the best */
1306
12.2M
  for (i = 0; i < 16; ++i) {
1307
11.5M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
11.5M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
11.5M
    *bd->eob = bsi.eobs[i];
1311
11.5M
  }
1312
1313
719k
  *returntotrate = bsi.r;
1314
719k
  *returndistortion = bsi.d;
1315
719k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
719k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
719k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.66M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
2.94M
    int j;
1323
1324
2.94M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
2.94M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
2.94M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
2.94M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
719k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
719k
  return bsi.segment_rd;
1335
719k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.61M
                 int near_sadidx[]) {
1341
1.61M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.61M
  const MODE_INFO *left = here - 1;
1343
1.61M
  const MODE_INFO *aboveleft = above - 1;
1344
1.61M
  int_mv near_mvs[8];
1345
1.61M
  int near_ref[8];
1346
1.61M
  int_mv mv;
1347
1.61M
  int vcnt = 0;
1348
1.61M
  int find = 0;
1349
1.61M
  int mb_offset;
1350
1351
1.61M
  int mvx[8];
1352
1.61M
  int mvy[8];
1353
1.61M
  int i;
1354
1355
1.61M
  mv.as_int = 0;
1356
1357
1.61M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.61M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.61M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.61M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.61M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.61M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.61M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
492k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
492k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
492k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
492k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
492k
    }
1373
1.61M
    vcnt++;
1374
1.61M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
652k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
652k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
652k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
652k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
652k
    }
1380
1.61M
    vcnt++;
1381
1.61M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
380k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
380k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
380k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
380k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
380k
    }
1387
1.61M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.61M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
1.00M
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
1.00M
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
1.00M
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
600k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
600k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
600k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
600k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
600k
      }
1401
1.00M
      vcnt++;
1402
1403
      /* above in last frame */
1404
1.00M
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
1.00M
          INTRA_FRAME) {
1406
346k
        near_mvs[vcnt].as_int =
1407
346k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
346k
        mv_bias(
1409
346k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
346k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
346k
        near_ref[vcnt] =
1412
346k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
346k
      }
1414
1.00M
      vcnt++;
1415
1416
      /* left in last frame */
1417
1.00M
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
408k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
408k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
408k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
408k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
408k
      }
1423
1.00M
      vcnt++;
1424
1425
      /* right in last frame */
1426
1.00M
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
411k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
411k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
411k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
411k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
411k
      }
1432
1.00M
      vcnt++;
1433
1434
      /* below in last frame */
1435
1.00M
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
1.00M
          INTRA_FRAME) {
1437
340k
        near_mvs[vcnt].as_int =
1438
340k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
340k
        mv_bias(
1440
340k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
340k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
340k
        near_ref[vcnt] =
1443
340k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
340k
      }
1445
1.00M
      vcnt++;
1446
1.00M
    }
1447
1448
6.75M
    for (i = 0; i < vcnt; ++i) {
1449
6.01M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
2.10M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
876k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
876k
          find = 1;
1453
876k
          if (i < 3) {
1454
794k
            *sr = 3;
1455
794k
          } else {
1456
81.4k
            *sr = 2;
1457
81.4k
          }
1458
876k
          break;
1459
876k
        }
1460
2.10M
      }
1461
6.01M
    }
1462
1463
1.61M
    if (!find) {
1464
5.24M
      for (i = 0; i < vcnt; ++i) {
1465
4.50M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.50M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.50M
      }
1468
1469
737k
      insertsortmv(mvx, vcnt);
1470
737k
      insertsortmv(mvy, vcnt);
1471
737k
      mv.as_mv.row = mvx[vcnt / 2];
1472
737k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
737k
      *sr = 0;
1478
737k
    }
1479
1.61M
  }
1480
1481
  /* Set up return values */
1482
1.61M
  mvp->as_int = mv.as_int;
1483
1.61M
  vp8_clamp_mv2(mvp, xd);
1484
1.61M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.15M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.15M
  int near_sad[8] = { 0 };
1493
1.15M
  BLOCK *b = &x->block[0];
1494
1.15M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.15M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
85.9k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.06M
  } else if (xd->mb_to_top_edge ==
1500
1.06M
             0) { /* only has left MB for sad calculation. */
1501
445k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
445k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
445k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
623k
  } else if (xd->mb_to_left_edge ==
1505
623k
             0) { /* only has left MB for sad calculation. */
1506
112k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
112k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
112k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
112k
        xd->dst.y_stride);
1510
511k
  } else {
1511
511k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
511k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
511k
        xd->dst.y_stride);
1514
511k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
511k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
511k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
511k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
511k
        xd->dst.y_stride);
1519
511k
  }
1520
1521
1.15M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
542k
    unsigned char *pre_y_buffer =
1524
542k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
542k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
542k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
542k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
542k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
542k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
542k
    if (near_sad[4] != INT_MAX) {
1533
312k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
312k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
312k
          pre_y_stride);
1536
312k
    }
1537
542k
    if (near_sad[5] != INT_MAX) {
1538
397k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
397k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
397k
    }
1541
542k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
542k
                                               pre_y_buffer, pre_y_stride);
1543
542k
    if (near_sad[6] != INT_MAX) {
1544
399k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
399k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
399k
    }
1547
542k
    if (near_sad[7] != INT_MAX) {
1548
335k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
335k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
335k
          pre_y_stride);
1551
335k
    }
1552
542k
  }
1553
1554
1.15M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
542k
    insertsortsad(near_sad, near_sadidx, 8);
1556
612k
  } else {
1557
612k
    insertsortsad(near_sad, near_sadidx, 3);
1558
612k
  }
1559
1.15M
}
1560
1561
736k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
736k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
173k
    int i;
1564
1565
1.53M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.36M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
590k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
590k
                              best_ref_mv->as_mv.row) >>
1569
590k
                             1);
1570
590k
        const int row_idx = mv_max + row_val;
1571
590k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
590k
                              best_ref_mv->as_mv.col) >>
1573
590k
                             1);
1574
590k
        const int col_idx = mv_max + col_val;
1575
590k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
590k
            col_idx < MVvals) {
1577
590k
          x->MVcount[0][row_idx]++;
1578
590k
          x->MVcount[1][col_idx]++;
1579
590k
        }
1580
590k
      }
1581
1.36M
    }
1582
563k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
90.9k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
90.9k
                          best_ref_mv->as_mv.row) >>
1585
90.9k
                         1);
1586
90.9k
    const int row_idx = mv_max + row_val;
1587
90.9k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
90.9k
                          best_ref_mv->as_mv.col) >>
1589
90.9k
                         1);
1590
90.9k
    const int col_idx = mv_max + col_val;
1591
90.9k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
90.9k
      x->MVcount[0][row_idx]++;
1593
90.9k
      x->MVcount[1][col_idx]++;
1594
90.9k
    }
1595
90.9k
  }
1596
736k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
2.89M
                                  MACROBLOCK *x) {
1601
2.89M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
2.89M
  BLOCK *b = &x->block[0];
1603
2.89M
  MACROBLOCKD *xd = &x->e_mbd;
1604
2.89M
  int distortion;
1605
2.89M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
2.89M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
2.89M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
2.89M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
2.89M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
2.89M
  rd->rate2 += rd->rate_y;
1649
2.89M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
2.89M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
2.89M
                   cpi->common.full_pixel);
1654
2.89M
  rd->rate2 += rd->rate_uv;
1655
2.89M
  rd->distortion2 += rd->distortion_uv;
1656
2.89M
  return INT_MAX;
1657
2.89M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
6.67M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
6.67M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
6.67M
  if (cpi->common.mb_no_coeff_skip) {
1670
6.67M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
6.67M
    rd->rate2 += *other_cost;
1672
6.67M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
6.67M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
6.67M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
5.99M
    if (cpi->common.mb_no_coeff_skip) {
1685
5.99M
      int i;
1686
5.99M
      int tteob;
1687
5.99M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
5.99M
      tteob = 0;
1690
5.99M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
101M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
5.99M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
29.3M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.26M
      } else {
1697
2.73M
        tteob += uv_intra_tteob;
1698
2.73M
      }
1699
1700
5.99M
      if (tteob == 0) {
1701
364k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
364k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
364k
        if (cpi->prob_skip_false) {
1707
364k
          int prob_skip_cost;
1708
1709
364k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
364k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
364k
          rd->rate2 += prob_skip_cost;
1712
364k
          *other_cost += prob_skip_cost;
1713
364k
        }
1714
364k
      }
1715
5.99M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
5.99M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
5.99M
    if (this_rd < INT_MAX &&
1719
5.99M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
2.73M
      this_rd += intra_rd_penalty;
1721
2.73M
    }
1722
5.99M
  }
1723
6.67M
  return this_rd;
1724
6.67M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.36M
                             MACROBLOCK *x) {
1729
2.36M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.36M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.36M
  best_mode->yrd =
1735
2.36M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.36M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.36M
  best_mode->rd = this_rd;
1739
2.36M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.36M
  best_mode->partition = *x->partition_info;
1741
1742
2.36M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
496k
    int i;
1744
8.44M
    for (i = 0; i < 16; ++i) {
1745
7.95M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
7.95M
    }
1747
496k
  }
1748
2.36M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
736k
                            int mb_col) {
1754
736k
  BLOCK *b = &x->block[0];
1755
736k
  BLOCKD *d = &x->e_mbd.block[0];
1756
736k
  MACROBLOCKD *xd = &x->e_mbd;
1757
736k
  int_mv best_ref_mv_sb[2];
1758
736k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
736k
  int_mv best_ref_mv;
1760
736k
  int_mv *mode_mv;
1761
736k
  MB_PREDICTION_MODE this_mode;
1762
736k
  int num00;
1763
736k
  int best_mode_index = 0;
1764
736k
  BEST_MODE best_mode;
1765
1766
736k
  int i;
1767
736k
  int mode_index;
1768
736k
  int mdcounts[4];
1769
736k
  int rate;
1770
736k
  RATE_DISTORTION rd;
1771
736k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
736k
  int uv_intra_tteob = 0;
1773
736k
  int uv_intra_done = 0;
1774
1775
736k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
736k
  int_mv mvp;
1777
736k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
736k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
736k
  int sr = 0;
1781
1782
736k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
736k
  int ref_frame_map[4];
1784
736k
  int sign_bias = 0;
1785
1786
736k
  int intra_rd_penalty =
1787
736k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
736k
#if CONFIG_TEMPORAL_DENOISING
1790
736k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
736k
               best_rd_sse = UINT_MAX;
1792
736k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
736k
  rd.rate_uv = 0;
1796
736k
  rd.distortion_uv = 0;
1797
1798
736k
  mode_mv = mode_mv_sb[sign_bias];
1799
736k
  best_ref_mv.as_int = 0;
1800
736k
  best_mode.rd = INT_MAX;
1801
736k
  best_mode.yrd = INT_MAX;
1802
736k
  best_mode.intra_rd = INT_MAX;
1803
736k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
736k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
736k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
736k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
736k
  if (ref_frame_map[1] > 0) {
1814
736k
    sign_bias = vp8_find_near_mvs_bias(
1815
736k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
736k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
736k
    mode_mv = mode_mv_sb[sign_bias];
1819
736k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
736k
  }
1821
1822
736k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
736k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
736k
  x->mbs_tested_so_far++;
1827
1828
736k
  x->skip = 0;
1829
1830
15.4M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
14.7M
    int this_rd = INT_MAX;
1832
14.7M
    int disable_skip = 0;
1833
14.7M
    int other_cost = 0;
1834
14.7M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
14.7M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
13.2M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
8.46M
    rd.rate2 = 0;
1845
8.46M
    rd.distortion2 = 0;
1846
1847
8.46M
    this_mode = vp8_mode_order[mode_index];
1848
1849
8.46M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
8.46M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
8.46M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
8.46M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.40M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.40M
             plane[this_ref_frame][1] != NULL &&
1867
5.40M
             plane[this_ref_frame][2] != NULL);
1868
5.40M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.40M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.40M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.40M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.40M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
8.46M
    if (x->mode_test_hit_counts[mode_index] &&
1884
7.53M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
241k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
241k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
133k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
133k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
26.2k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
26.2k
        }
1895
1896
133k
        x->rd_threshes[mode_index] =
1897
133k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
133k
            x->rd_thresh_mult[mode_index];
1899
1900
133k
        continue;
1901
133k
      }
1902
241k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.33M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.33M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.33M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
736k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
736k
                              &uv_intra_distortion);
1936
736k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
6.63M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
736k
      uv_intra_done = 1;
1946
736k
    }
1947
1948
8.33M
    switch (this_mode) {
1949
518k
      case B_PRED: {
1950
518k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
518k
        int distortion;
1956
518k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
518k
                                           best_mode.yrd);
1958
518k
        rd.rate2 += rate;
1959
518k
        rd.distortion2 += distortion;
1960
1961
518k
        if (tmp_rd < best_mode.yrd) {
1962
196k
          assert(uv_intra_done);
1963
196k
          rd.rate2 += uv_intra_rate;
1964
196k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
196k
          rd.distortion2 += uv_intra_distortion;
1966
196k
          rd.distortion_uv = uv_intra_distortion;
1967
321k
        } else {
1968
321k
          this_rd = INT_MAX;
1969
321k
          disable_skip = 1;
1970
321k
        }
1971
518k
        break;
1972
0
      }
1973
1974
719k
      case SPLITMV: {
1975
719k
        int tmp_rd;
1976
719k
        int this_rd_thresh;
1977
719k
        int distortion;
1978
1979
719k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
719k
                             ? x->rd_threshes[THR_NEW1]
1981
719k
                             : x->rd_threshes[THR_NEW3];
1982
719k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
719k
                             ? x->rd_threshes[THR_NEW2]
1984
719k
                             : this_rd_thresh;
1985
1986
719k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
719k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
719k
            &distortion, this_rd_thresh);
1989
1990
719k
        rd.rate2 += rate;
1991
719k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
719k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
371k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
371k
                         cpi->common.full_pixel);
2000
371k
          rd.rate2 += rd.rate_uv;
2001
371k
          rd.distortion2 += rd.distortion_uv;
2002
371k
        } else {
2003
348k
          this_rd = INT_MAX;
2004
348k
          disable_skip = 1;
2005
348k
        }
2006
719k
        break;
2007
0
      }
2008
736k
      case DC_PRED:
2009
1.34M
      case V_PRED:
2010
1.94M
      case H_PRED:
2011
2.53M
      case TM_PRED: {
2012
2.53M
        int distortion;
2013
2.53M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.53M
        vp8_build_intra_predictors_mby_s(
2016
2.53M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.53M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.53M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.53M
        rd.rate2 += rd.rate_y;
2020
2.53M
        rd.distortion2 += distortion;
2021
2.53M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.53M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.53M
        assert(uv_intra_done);
2024
2.53M
        rd.rate2 += uv_intra_rate;
2025
2.53M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.53M
        rd.distortion2 += uv_intra_distortion;
2027
2.53M
        rd.distortion_uv = uv_intra_distortion;
2028
2.53M
        break;
2029
1.94M
      }
2030
2031
984k
      case NEWMV: {
2032
984k
        int thissme;
2033
984k
        int bestsme = INT_MAX;
2034
984k
        int step_param = cpi->sf.first_step;
2035
984k
        int further_steps;
2036
984k
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
984k
        int do_refine = 1;
2041
2042
984k
        int sadpb = x->sadperbit16;
2043
984k
        int_mv mvp_full;
2044
2045
984k
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
984k
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
984k
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
984k
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
984k
        int tmp_col_min = x->mv_col_min;
2051
984k
        int tmp_col_max = x->mv_col_max;
2052
984k
        int tmp_row_min = x->mv_row_min;
2053
984k
        int tmp_row_max = x->mv_row_max;
2054
2055
984k
        if (!saddone) {
2056
629k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
629k
          saddone = 1;
2058
629k
        }
2059
2060
984k
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
984k
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
984k
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
984k
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
984k
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
984k
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
984k
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
984k
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
984k
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
984k
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
984k
        {
2080
984k
          bestsme = cpi->diamond_search_sad(
2081
984k
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
984k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
984k
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
984k
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
984k
          n = num00;
2089
984k
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
984k
          if (n > further_steps) do_refine = 0;
2094
2095
4.22M
          while (n < further_steps) {
2096
3.23M
            n++;
2097
2098
3.23M
            if (num00) {
2099
287k
              num00--;
2100
2.94M
            } else {
2101
2.94M
              thissme = cpi->diamond_search_sad(
2102
2.94M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
2.94M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
2.94M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
2.94M
              if (thissme < bestsme) {
2109
453k
                bestsme = thissme;
2110
453k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.49M
              } else {
2112
2.49M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.49M
              }
2114
2.94M
            }
2115
3.23M
          }
2116
984k
        }
2117
2118
        /* final 1-away diamond refining search */
2119
984k
        if (do_refine == 1) {
2120
641k
          int search_range;
2121
2122
641k
          search_range = 8;
2123
2124
641k
          thissme = cpi->refining_search_sad(
2125
641k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
641k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
641k
          if (thissme < bestsme) {
2129
29.8k
            bestsme = thissme;
2130
29.8k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
611k
          } else {
2132
611k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
611k
          }
2134
641k
        }
2135
2136
984k
        x->mv_col_min = tmp_col_min;
2137
984k
        x->mv_col_max = tmp_col_max;
2138
984k
        x->mv_row_min = tmp_row_min;
2139
984k
        x->mv_row_max = tmp_row_max;
2140
2141
984k
        if (bestsme < INT_MAX) {
2142
984k
          int dis; /* TODO: use dis in distortion calculation later. */
2143
984k
          unsigned int sse;
2144
984k
          cpi->find_fractional_mv_step(
2145
984k
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
984k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
984k
        }
2148
2149
984k
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
984k
        rd.rate2 +=
2153
984k
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
984k
      }
2155
        // fall through
2156
2157
2.17M
      case NEARESTMV:
2158
3.36M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.36M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.36M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
2.38M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.66M
          continue;
2171
1.66M
        }
2172
        // fall through
2173
2174
2.89M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
2.89M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
2.89M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
2.89M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
2.89M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
2.89M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
2.89M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
2.89M
        break;
2191
2192
0
      default: break;
2193
8.33M
    }
2194
2195
6.67M
    this_rd =
2196
6.67M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
6.67M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
6.67M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
3.05M
        (this_rd < best_mode.intra_rd)) {
2202
1.08M
      best_mode.intra_rd = this_rd;
2203
1.08M
      *returnintra = rd.distortion2;
2204
1.08M
    }
2205
6.67M
#if CONFIG_TEMPORAL_DENOISING
2206
6.67M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
6.67M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
6.67M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.36M
      best_mode_index = mode_index;
2238
2.36M
      *returnrate = rd.rate2;
2239
2.36M
      *returndistortion = rd.distortion2;
2240
2.36M
      if (this_mode <= B_PRED) {
2241
936k
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
936k
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
936k
      }
2245
2.36M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.36M
      x->rd_thresh_mult[mode_index] =
2251
2.36M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.36M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.36M
              : MIN_THRESHMULT;
2254
2.36M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.30M
    else {
2260
4.30M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.30M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.18M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.18M
      }
2265
4.30M
    }
2266
6.67M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
6.67M
                                 x->rd_thresh_mult[mode_index];
2268
2269
6.67M
    if (x->skip) break;
2270
6.67M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
736k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
476k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
476k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
476k
    x->rd_thresh_mult[best_mode_index] =
2278
476k
        (x->rd_thresh_mult[best_mode_index] >=
2279
476k
         (MIN_THRESHMULT + best_adjustment))
2280
476k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
476k
            : MIN_THRESHMULT;
2282
476k
    x->rd_threshes[best_mode_index] =
2283
476k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
476k
        x->rd_thresh_mult[best_mode_index];
2285
476k
  }
2286
2287
736k
#if CONFIG_TEMPORAL_DENOISING
2288
736k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
736k
#endif
2335
2336
736k
  if (cpi->is_src_frame_alt_ref &&
2337
0
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
736k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
736k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.22M
    for (i = 0; i < 16; ++i) {
2354
3.03M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.03M
    }
2356
189k
  }
2357
2358
736k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
2.94M
    for (i = 0; i < 16; ++i) {
2360
2.76M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.76M
    }
2362
2363
173k
    *x->partition_info = best_mode.partition;
2364
2365
173k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
173k
        x->partition_info->bmi[15].mv.as_int;
2367
173k
  }
2368
2369
736k
  if (sign_bias !=
2370
736k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
736k
  rd_update_mvcount(x, &best_ref_mv);
2375
736k
}
2376
2377
640k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
640k
  int error4x4, error16x16;
2379
640k
  int rate4x4, rate16x16 = 0, rateuv;
2380
640k
  int dist4x4, dist16x16, distuv;
2381
640k
  int rate_;
2382
640k
  int rate4x4_tokenonly = 0;
2383
640k
  int rate16x16_tokenonly = 0;
2384
640k
  int rateuv_tokenonly = 0;
2385
2386
640k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
640k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
640k
  rate_ = rateuv;
2390
2391
640k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
640k
                                          &dist16x16);
2393
2394
640k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
640k
                                       &dist4x4, error16x16);
2396
2397
640k
  if (error4x4 < error16x16) {
2398
290k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
290k
    rate_ += rate4x4;
2400
349k
  } else {
2401
349k
    rate_ += rate16x16;
2402
349k
  }
2403
2404
640k
  *rate = rate_;
2405
640k
}