Coverage Report

Created: 2025-08-28 07:12

/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.52M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
114k
                    [ENTROPY_NODES]) {
116
114k
  int i, j, k;
117
118
570k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
4.10M
    for (j = 0; j < COEF_BANDS; ++j) {
120
14.6M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
10.9M
        if (k == 0 && j > (i == 0)) {
125
3.07M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
7.87M
        } else {
127
7.87M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
7.87M
        }
129
10.9M
      }
130
3.65M
    }
131
456k
  }
132
114k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
114k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
114k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
114k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
114k
}
162
163
114k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
114k
  int q;
165
114k
  int i;
166
114k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
114k
  double rdconst = 2.80;
168
169
114k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
114k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
114k
  if (cpi->mb.zbin_over_quant > 0) {
178
21.2k
    double oq_factor;
179
21.2k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
21.2k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
21.2k
    modq = (int)((double)capped_q * oq_factor);
186
21.2k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
21.2k
  }
188
189
114k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
114k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
114k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
114k
  vp8_set_speed_features(cpi);
202
203
2.39M
  for (i = 0; i < MAX_MODES; ++i) {
204
2.28M
    x->mode_test_hit_counts[i] = 0;
205
2.28M
  }
206
207
114k
  q = (int)pow(Qvalue, 1.25);
208
209
114k
  if (q < 8) q = 8;
210
211
114k
  if (cpi->RDMULT > 1000) {
212
63.6k
    cpi->RDDIV = 1;
213
63.6k
    cpi->RDMULT /= 100;
214
215
1.33M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.27M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.21M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.21M
      } else {
219
57.8k
        x->rd_threshes[i] = INT_MAX;
220
57.8k
      }
221
222
1.27M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.27M
    }
224
63.6k
  } else {
225
50.4k
    cpi->RDDIV = 100;
226
227
1.05M
    for (i = 0; i < MAX_MODES; ++i) {
228
1.00M
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
934k
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
934k
      } else {
231
73.3k
        x->rd_threshes[i] = INT_MAX;
232
73.3k
      }
233
234
1.00M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
1.00M
    }
236
50.4k
  }
237
238
114k
  {
239
    /* build token cost array for the type of frame we have now */
240
114k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
114k
    if (cpi->common.refresh_alt_ref_frame) {
243
27.7k
      l = &cpi->lfc_a;
244
86.3k
    } else if (cpi->common.refresh_golden_frame) {
245
8.41k
      l = &cpi->lfc_g;
246
8.41k
    }
247
248
114k
    fill_token_costs(cpi->mb.token_costs,
249
114k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
114k
    vp8_init_mode_costs(cpi);
258
114k
  }
259
114k
}
260
261
43.7k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
43.7k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
43.7k
  milliseconds_for_compress =
265
43.7k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
43.7k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
43.7k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
43.7k
          milliseconds_for_compress) {
284
43.7k
    if (cpi->avg_pick_mode_time == 0) {
285
3.01k
      cpi->Speed = 4;
286
40.7k
    } else {
287
40.7k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
0
        cpi->Speed += 2;
289
0
        cpi->avg_pick_mode_time = 0;
290
0
        cpi->avg_encode_time = 0;
291
292
0
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
0
      }
296
297
40.7k
      if (milliseconds_for_compress * 100 >
298
40.7k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
40.6k
        cpi->Speed -= 1;
300
40.6k
        cpi->avg_pick_mode_time = 0;
301
40.6k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
40.6k
        if (cpi->Speed < 4) {
305
40.6k
          cpi->Speed = 4;
306
40.6k
        }
307
40.6k
      }
308
40.7k
    }
309
43.7k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
43.7k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
41.2k
int VP8_UVSSE(MACROBLOCK *x) {
372
41.2k
  unsigned char *uptr, *vptr;
373
41.2k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
41.2k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
41.2k
  int uv_stride = x->block[16].src_stride;
376
377
41.2k
  unsigned int sse1 = 0;
378
41.2k
  unsigned int sse2 = 0;
379
41.2k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
41.2k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
41.2k
  int offset;
382
41.2k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
41.2k
  if (mv_row < 0) {
385
551
    mv_row -= 1;
386
40.6k
  } else {
387
40.6k
    mv_row += 1;
388
40.6k
  }
389
390
41.2k
  if (mv_col < 0) {
391
880
    mv_col -= 1;
392
40.3k
  } else {
393
40.3k
    mv_col += 1;
394
40.3k
  }
395
396
41.2k
  mv_row /= 2;
397
41.2k
  mv_col /= 2;
398
399
41.2k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
41.2k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
41.2k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
41.2k
  if ((mv_row | mv_col) & 7) {
404
2.04k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
2.04k
                              upred_ptr, uv_stride, &sse2);
406
2.04k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
2.04k
                              vpred_ptr, uv_stride, &sse1);
408
2.04k
    sse2 += sse1;
409
39.2k
  } else {
410
39.2k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
39.2k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
39.2k
    sse2 += sse1;
413
39.2k
  }
414
41.2k
  return sse2;
415
41.2k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
461M
                       ENTROPY_CONTEXT *l) {
419
461M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
461M
  int eob = (int)(*b->eob);
421
461M
  int pt; /* surrounding block/prev coef predictor */
422
461M
  int cost = 0;
423
461M
  short *qcoeff_ptr = b->qcoeff;
424
425
461M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
461M
  assert(eob <= 16);
428
4.40G
  for (; c < eob; ++c) {
429
3.94G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
3.94G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
3.94G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
3.94G
    cost += vp8_dct_value_cost_ptr[v];
433
3.94G
    pt = vp8_prev_token_class[t];
434
3.94G
  }
435
436
461M
  if (c < 16) {
437
295M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
295M
  }
439
440
461M
  pt = (c != !type); /* is eob first coefficient; */
441
461M
  *a = *l = pt;
442
443
461M
  return cost;
444
461M
}
445
446
8.97M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
8.97M
  int cost = 0;
448
8.97M
  int b;
449
8.97M
  MACROBLOCKD *x = &mb->e_mbd;
450
8.97M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
8.97M
  ENTROPY_CONTEXT *ta;
452
8.97M
  ENTROPY_CONTEXT *tl;
453
454
8.97M
  t_above = *mb->e_mbd.above_context;
455
8.97M
  t_left = *mb->e_mbd.left_context;
456
457
8.97M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
8.97M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
152M
  for (b = 0; b < 16; ++b) {
461
143M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
143M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
143M
  }
464
465
8.97M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
8.97M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
8.97M
  return cost;
469
8.97M
}
470
471
8.97M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
8.97M
  int b;
473
8.97M
  MACROBLOCKD *const x = &mb->e_mbd;
474
8.97M
  BLOCK *const mb_y2 = mb->block + 24;
475
8.97M
  BLOCKD *const x_y2 = x->block + 24;
476
8.97M
  short *Y2DCPtr = mb_y2->src_diff;
477
8.97M
  BLOCK *beptr;
478
8.97M
  int d;
479
480
8.97M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
8.97M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
80.7M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
71.7M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
71.7M
    *Y2DCPtr++ = beptr->coeff[0];
487
71.7M
    *Y2DCPtr++ = beptr->coeff[16];
488
71.7M
  }
489
490
  /* 2nd order fdct */
491
8.97M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
152M
  for (b = 0; b < 16; ++b) {
495
143M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
143M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
8.97M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
8.97M
  d = vp8_mbblock_error(mb, 1) << 2;
503
8.97M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
8.97M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
8.97M
  *Rate = vp8_rdcost_mby(mb);
509
8.97M
}
510
511
25.7M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
25.7M
  const unsigned int *p = (const unsigned int *)predictor;
513
25.7M
  unsigned int *d = (unsigned int *)dst;
514
25.7M
  d[0] = p[0];
515
25.7M
  d[4] = p[4];
516
25.7M
  d[8] = p[8];
517
25.7M
  d[12] = p[12];
518
25.7M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
13.8M
                                 int *bestdistortion) {
526
13.8M
  B_PREDICTION_MODE mode;
527
13.8M
  int best_rd = INT_MAX;
528
13.8M
  int rate = 0;
529
13.8M
  int distortion;
530
531
13.8M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
13.8M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
13.8M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
13.8M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
13.8M
  int dst_stride = x->e_mbd.dst.y_stride;
541
13.8M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
13.8M
  unsigned char *Above = dst - dst_stride;
544
13.8M
  unsigned char *yleft = dst - 1;
545
13.8M
  unsigned char top_left = Above[-1];
546
547
152M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
138M
    int this_rd;
549
138M
    int ratey;
550
551
138M
    rate = bmode_costs[mode];
552
553
138M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
138M
                         top_left);
555
138M
    vp8_subtract_b(be, b, 16);
556
138M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
138M
    x->quantize_b(be, b);
558
559
138M
    tempa = ta;
560
138M
    templ = tl;
561
562
138M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
138M
    rate += ratey;
564
138M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
138M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
138M
    if (this_rd < best_rd) {
569
25.7M
      *bestrate = rate;
570
25.7M
      *bestratey = ratey;
571
25.7M
      *bestdistortion = distortion;
572
25.7M
      best_rd = this_rd;
573
25.7M
      *best_mode = mode;
574
25.7M
      *a = tempa;
575
25.7M
      *l = templ;
576
25.7M
      copy_predictor(best_predictor, b->predictor);
577
25.7M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
25.7M
    }
579
138M
  }
580
13.8M
  b->bmi.as_mode = *best_mode;
581
582
13.8M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
13.8M
  return best_rd;
585
13.8M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.37M
                                     int *Distortion, int best_rd) {
589
1.37M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.37M
  int i;
591
1.37M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.37M
  int distortion = 0;
593
1.37M
  int tot_rate_y = 0;
594
1.37M
  int64_t total_rd = 0;
595
1.37M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.37M
  ENTROPY_CONTEXT *ta;
597
1.37M
  ENTROPY_CONTEXT *tl;
598
1.37M
  const int *bmode_costs;
599
600
1.37M
  t_above = *mb->e_mbd.above_context;
601
1.37M
  t_left = *mb->e_mbd.left_context;
602
603
1.37M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.37M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.37M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.37M
  bmode_costs = mb->inter_bmode_costs;
609
610
14.3M
  for (i = 0; i < 16; ++i) {
611
13.8M
    MODE_INFO *const mic = xd->mode_info_context;
612
13.8M
    const int mis = xd->mode_info_stride;
613
13.8M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
13.8M
    int r = 0, ry = 0, d = 0;
615
616
13.8M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
7.78M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
7.78M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
7.78M
      bmode_costs = mb->bmode_costs[A][L];
621
7.78M
    }
622
623
13.8M
    total_rd += rd_pick_intra4x4block(
624
13.8M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
13.8M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
13.8M
    cost += r;
628
13.8M
    distortion += d;
629
13.8M
    tot_rate_y += ry;
630
631
13.8M
    assert(best_mode != B_MODE_COUNT);
632
13.8M
    mic->bmi[i].as_mode = best_mode;
633
634
13.8M
    if (total_rd >= (int64_t)best_rd) break;
635
13.8M
  }
636
637
1.37M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
526k
  *Rate = cost;
640
526k
  *rate_y = tot_rate_y;
641
526k
  *Distortion = distortion;
642
643
526k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.37M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
824k
                                      int *Distortion) {
648
824k
  MB_PREDICTION_MODE mode;
649
824k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
824k
  int rate, ratey;
651
824k
  int distortion;
652
824k
  int best_rd = INT_MAX;
653
824k
  int this_rd;
654
824k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
4.12M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
3.29M
    xd->mode_info_context->mbmi.mode = mode;
659
660
3.29M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
3.29M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
3.29M
                                     xd->predictor, 16);
663
664
3.29M
    macro_block_yrd(x, &ratey, &distortion);
665
3.29M
    rate = ratey +
666
3.29M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
3.29M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
3.29M
    if (this_rd < best_rd) {
671
993k
      mode_selected = mode;
672
993k
      best_rd = this_rd;
673
993k
      *Rate = rate;
674
993k
      *rate_y = ratey;
675
993k
      *Distortion = distortion;
676
993k
    }
677
3.29M
  }
678
679
824k
  assert(mode_selected != MB_MODE_COUNT);
680
824k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
824k
  return best_rd;
682
824k
}
683
684
9.80M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
9.80M
  int b;
686
9.80M
  int cost = 0;
687
9.80M
  MACROBLOCKD *x = &mb->e_mbd;
688
9.80M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
9.80M
  ENTROPY_CONTEXT *ta;
690
9.80M
  ENTROPY_CONTEXT *tl;
691
692
9.80M
  t_above = *mb->e_mbd.above_context;
693
9.80M
  t_left = *mb->e_mbd.left_context;
694
695
9.80M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
9.80M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
88.2M
  for (b = 16; b < 24; ++b) {
699
78.4M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
78.4M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
78.4M
  }
702
703
9.80M
  return cost;
704
9.80M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
2.93M
                            int *distortion, int fullpixel) {
708
2.93M
  (void)cpi;
709
2.93M
  (void)fullpixel;
710
711
2.93M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
2.93M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
2.93M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
2.93M
                    &x->e_mbd.predictor[320], 8);
715
716
2.93M
  vp8_transform_mbuv(x);
717
2.93M
  vp8_quantize_mbuv(x);
718
719
2.93M
  *rate = rd_cost_mbuv(x);
720
2.93M
  *distortion = vp8_mbuverror(x) / 4;
721
722
2.93M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
2.93M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
382k
                          int *distortion, int fullpixel) {
727
382k
  (void)cpi;
728
382k
  (void)fullpixel;
729
730
382k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
382k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
382k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
382k
                    &x->e_mbd.predictor[320], 8);
734
735
382k
  vp8_transform_mbuv(x);
736
382k
  vp8_quantize_mbuv(x);
737
738
382k
  *rate = rd_cost_mbuv(x);
739
382k
  *distortion = vp8_mbuverror(x) / 4;
740
741
382k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
382k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.62M
                                    int *rate_tokenonly, int *distortion) {
746
1.62M
  MB_PREDICTION_MODE mode;
747
1.62M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.62M
  int best_rd = INT_MAX;
749
1.62M
  int d = 0, r = 0;
750
1.62M
  int rate_to;
751
1.62M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
8.11M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
6.49M
    int this_rate;
755
6.49M
    int this_distortion;
756
6.49M
    int this_rd;
757
758
6.49M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
6.49M
    vp8_build_intra_predictors_mbuv_s(
761
6.49M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
6.49M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
6.49M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
6.49M
        &xd->predictor[320], 8);
765
766
6.49M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
6.49M
                      x->src.uv_stride, &xd->predictor[256],
768
6.49M
                      &xd->predictor[320], 8);
769
6.49M
    vp8_transform_mbuv(x);
770
6.49M
    vp8_quantize_mbuv(x);
771
772
6.49M
    rate_to = rd_cost_mbuv(x);
773
6.49M
    this_rate =
774
6.49M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
6.49M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
6.49M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
6.49M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
6.49M
    if (this_rd < best_rd) {
782
1.97M
      best_rd = this_rd;
783
1.97M
      d = this_distortion;
784
1.97M
      r = this_rate;
785
1.97M
      *rate_tokenonly = rate_to;
786
1.97M
      mode_selected = mode;
787
1.97M
    }
788
6.49M
  }
789
790
1.62M
  *rate = r;
791
1.62M
  *distortion = d;
792
793
1.62M
  assert(mode_selected != MB_MODE_COUNT);
794
1.62M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.62M
}
796
797
6.56M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
6.56M
  vp8_prob p[VP8_MVREFS - 1];
799
6.56M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
6.56M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
6.56M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
6.56M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
6.56M
}
804
805
2.93M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
2.93M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
2.93M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
2.93M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
30.8M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
30.8M
  MACROBLOCKD *const xd = &x->e_mbd;
814
30.8M
  MODE_INFO *const mic = xd->mode_info_context;
815
30.8M
  const int mis = xd->mode_info_stride;
816
817
30.8M
  int cost = 0;
818
30.8M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
30.8M
  int i = 0;
825
826
493M
  do {
827
493M
    BLOCKD *const d = xd->block + i;
828
493M
    const int row = i >> 2, col = i & 3;
829
830
493M
    B_PREDICTION_MODE m;
831
832
493M
    if (labelings[i] != which_label) continue;
833
834
118M
    if (col && labelings[i] == labelings[i - 1]) {
835
60.9M
      m = LEFT4X4;
836
60.9M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
26.8M
      m = ABOVE4X4;
838
30.8M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
30.8M
      switch (m = this_mode) {
843
8.54M
        case NEW4X4:
844
8.54M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.54M
          break;
846
9.02M
        case LEFT4X4:
847
9.02M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
9.02M
          break;
849
6.94M
        case ABOVE4X4:
850
6.94M
          this_mv->as_int =
851
6.94M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
6.94M
          break;
853
6.32M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
30.8M
      }
856
857
30.8M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
6.94M
        int_mv left_mv;
859
860
6.94M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
6.94M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
6.94M
      }
864
865
30.8M
      cost = x->inter_bmode_costs[m];
866
30.8M
    }
867
868
118M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
118M
    x->partition_info->bmi[i].mode = m;
871
118M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
493M
  } while (++i < 16);
874
875
30.8M
  cost += thismvcost;
876
30.8M
  return cost;
877
30.8M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
24.0M
                              ENTROPY_CONTEXT *tl) {
882
24.0M
  int cost = 0;
883
24.0M
  int b;
884
24.0M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
409M
  for (b = 0; b < 16; ++b) {
887
385M
    if (labels[b] == which_label) {
888
92.5M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
92.5M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
92.5M
    }
891
385M
  }
892
893
24.0M
  return cost;
894
24.0M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
24.0M
                                                int which_label) {
898
24.0M
  int i;
899
24.0M
  unsigned int distortion = 0;
900
24.0M
  int pre_stride = x->e_mbd.pre.y_stride;
901
24.0M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
409M
  for (i = 0; i < 16; ++i) {
904
385M
    if (labels[i] == which_label) {
905
92.5M
      BLOCKD *bd = &x->e_mbd.block[i];
906
92.5M
      BLOCK *be = &x->block[i];
907
908
92.5M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
92.5M
                                   x->e_mbd.subpixel_predict);
910
92.5M
      vp8_subtract_b(be, bd, 16);
911
92.5M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
92.5M
      x->quantize_b(be, bd);
913
914
92.5M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
92.5M
    }
916
385M
  }
917
918
24.0M
  return distortion;
919
24.0M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.64M
                             unsigned int segmentation) {
946
1.64M
  int i;
947
1.64M
  int const *labels;
948
1.64M
  int br = 0;
949
1.64M
  int bd = 0;
950
1.64M
  B_PREDICTION_MODE this_mode;
951
952
1.64M
  int label_count;
953
1.64M
  int this_segment_rd = 0;
954
1.64M
  int label_mv_thresh;
955
1.64M
  int rate = 0;
956
1.64M
  int sbr = 0;
957
1.64M
  int sbd = 0;
958
1.64M
  int segmentyrate = 0;
959
960
1.64M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.64M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.64M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.64M
  t_above = *x->e_mbd.above_context;
966
1.64M
  t_left = *x->e_mbd.left_context;
967
968
1.64M
  vp8_zero(t_above_b);
969
1.64M
  vp8_zero(t_left_b);
970
971
1.64M
  br = 0;
972
1.64M
  bd = 0;
973
974
1.64M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.64M
  labels = vp8_mbsplits[segmentation];
976
1.64M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.64M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.64M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.64M
                        vp8_mbsplit_encodings + segmentation);
987
1.64M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.64M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.64M
  br += rate;
990
991
7.02M
  for (i = 0; i < label_count; ++i) {
992
6.25M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.25M
    int best_label_rd = INT_MAX;
994
6.25M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.25M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
30.8M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
25.0M
      int this_rd;
1000
25.0M
      int distortion;
1001
25.0M
      int labelyrate;
1002
25.0M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
25.0M
      ENTROPY_CONTEXT *ta_s;
1004
25.0M
      ENTROPY_CONTEXT *tl_s;
1005
1006
25.0M
      t_above_s = t_above;
1007
25.0M
      t_left_s = t_left;
1008
1009
25.0M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
25.0M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
25.0M
      if (this_mode == NEW4X4) {
1013
6.25M
        int sseshift;
1014
6.25M
        int num00;
1015
6.25M
        int step_param = 0;
1016
6.25M
        int further_steps;
1017
6.25M
        int n;
1018
6.25M
        int thissme;
1019
6.25M
        int bestsme = INT_MAX;
1020
6.25M
        int_mv temp_mv;
1021
6.25M
        BLOCK *c;
1022
6.25M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.25M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
5.80M
        if (cpi->compressor_speed) {
1030
5.80M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.43M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.43M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
337k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
337k
            }
1035
1036
1.43M
            step_param = bsi->sv_istep[i];
1037
1.43M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
5.80M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
2.01M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
2.01M
            if (i == 4 || i == 8 || i == 12) {
1045
411k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
411k
            }
1047
2.01M
            step_param = 2;
1048
2.01M
          }
1049
5.80M
        }
1050
1051
5.80M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
5.80M
        {
1054
5.80M
          int sadpb = x->sadperbit4;
1055
5.80M
          int_mv mvp_full;
1056
1057
5.80M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
5.80M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
5.80M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
5.80M
          c = &x->block[n];
1064
5.80M
          e = &x->e_mbd.block[n];
1065
1066
5.80M
          {
1067
5.80M
            bestsme = cpi->diamond_search_sad(
1068
5.80M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
5.80M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
5.80M
            n = num00;
1072
5.80M
            num00 = 0;
1073
1074
23.3M
            while (n < further_steps) {
1075
17.5M
              n++;
1076
1077
17.5M
              if (num00) {
1078
2.14M
                num00--;
1079
15.4M
              } else {
1080
15.4M
                thissme = cpi->diamond_search_sad(
1081
15.4M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
15.4M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
15.4M
                if (thissme < bestsme) {
1085
2.87M
                  bestsme = thissme;
1086
2.87M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
2.87M
                }
1088
15.4M
              }
1089
17.5M
            }
1090
5.80M
          }
1091
1092
5.80M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
5.80M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
5.80M
        }
1114
1115
5.80M
        if (bestsme < INT_MAX) {
1116
5.80M
          int disto;
1117
5.80M
          unsigned int sse;
1118
5.80M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
5.80M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
5.80M
                                       &disto, &sse);
1121
5.80M
        }
1122
5.80M
      } /* NEW4X4 */
1123
1124
24.5M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
24.5M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
24.5M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
24.5M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
24.5M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
24.5M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
486k
        continue;
1133
486k
      }
1134
1135
24.0M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
24.0M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
24.0M
      rate += labelyrate;
1139
1140
24.0M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
24.0M
      if (this_rd < best_label_rd) {
1143
10.5M
        sbr = rate;
1144
10.5M
        sbd = distortion;
1145
10.5M
        bestlabelyrate = labelyrate;
1146
10.5M
        mode_selected = this_mode;
1147
10.5M
        best_label_rd = this_rd;
1148
1149
10.5M
        t_above_b = t_above_s;
1150
10.5M
        t_left_b = t_left_s;
1151
10.5M
      }
1152
24.0M
    } /*for each 4x4 mode*/
1153
1154
6.25M
    t_above = t_above_b;
1155
6.25M
    t_left = t_left_b;
1156
1157
6.25M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.25M
                bsi->ref_mv, x->mvcost);
1159
1160
6.25M
    br += sbr;
1161
6.25M
    bd += sbd;
1162
6.25M
    segmentyrate += bestlabelyrate;
1163
6.25M
    this_segment_rd += best_label_rd;
1164
1165
6.25M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.25M
  } /* for each label */
1168
1169
1.64M
  if (this_segment_rd < bsi->segment_rd) {
1170
766k
    bsi->r = br;
1171
766k
    bsi->d = bd;
1172
766k
    bsi->segment_yrate = segmentyrate;
1173
766k
    bsi->segment_rd = this_segment_rd;
1174
766k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
13.0M
    for (i = 0; i < 16; ++i) {
1178
12.2M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
12.2M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
12.2M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
12.2M
    }
1182
766k
  }
1183
1.64M
}
1184
1185
1.52M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.52M
  int step = 0;
1187
1188
1.52M
  if (sr > MAX_FIRST_STEP) {
1189
56.3k
    sr = MAX_FIRST_STEP;
1190
1.47M
  } else if (sr < 1) {
1191
684k
    sr = 1;
1192
684k
  }
1193
1194
4.76M
  while (sr >>= 1) step++;
1195
1196
1.52M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.52M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
726k
                                           int mvthresh) {
1205
726k
  int i;
1206
726k
  BEST_SEG_INFO bsi;
1207
1208
726k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
726k
  bsi.segment_rd = best_rd;
1211
726k
  bsi.ref_mv = best_ref_mv;
1212
726k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
726k
  bsi.mvthresh = mvthresh;
1214
726k
  bsi.mdcounts = mdcounts;
1215
1216
12.3M
  for (i = 0; i < 16; ++i) {
1217
11.6M
    bsi.modes[i] = ZERO4X4;
1218
11.6M
  }
1219
1220
726k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
726k
  } else {
1228
726k
    int sr;
1229
1230
726k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
726k
    if (bsi.segment_rd < best_rd) {
1233
382k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
382k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
382k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
382k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
382k
      int tmp_col_min = x->mv_col_min;
1239
382k
      int tmp_col_max = x->mv_col_max;
1240
382k
      int tmp_row_min = x->mv_row_min;
1241
382k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
382k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
382k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
382k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
382k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
382k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
382k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
382k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
382k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
382k
      {
1260
382k
        sr =
1261
382k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
382k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
382k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
382k
        sr =
1266
382k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
382k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
382k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
382k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
382k
      }
1272
1273
      /* block 16X8 */
1274
382k
      {
1275
382k
        sr =
1276
382k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
382k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
382k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
382k
        sr =
1281
382k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
382k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
382k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
382k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
382k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
382k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
150k
      {
1293
150k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
150k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
150k
      }
1296
1297
      /* restore UMV window */
1298
382k
      x->mv_col_min = tmp_col_min;
1299
382k
      x->mv_col_max = tmp_col_max;
1300
382k
      x->mv_row_min = tmp_row_min;
1301
382k
      x->mv_row_max = tmp_row_max;
1302
382k
    }
1303
726k
  }
1304
1305
  /* set it to the best */
1306
12.3M
  for (i = 0; i < 16; ++i) {
1307
11.6M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
11.6M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
11.6M
    *bd->eob = bsi.eobs[i];
1311
11.6M
  }
1312
1313
726k
  *returntotrate = bsi.r;
1314
726k
  *returndistortion = bsi.d;
1315
726k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
726k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
726k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.74M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
3.01M
    int j;
1323
1324
3.01M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
3.01M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
3.01M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
3.01M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
726k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
726k
  return bsi.segment_rd;
1335
726k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.51M
                 int near_sadidx[]) {
1341
1.51M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.51M
  const MODE_INFO *left = here - 1;
1343
1.51M
  const MODE_INFO *aboveleft = above - 1;
1344
1.51M
  int_mv near_mvs[8];
1345
1.51M
  int near_ref[8];
1346
1.51M
  int_mv mv;
1347
1.51M
  int vcnt = 0;
1348
1.51M
  int find = 0;
1349
1.51M
  int mb_offset;
1350
1351
1.51M
  int mvx[8];
1352
1.51M
  int mvy[8];
1353
1.51M
  int i;
1354
1355
1.51M
  mv.as_int = 0;
1356
1357
1.51M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.51M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.51M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.51M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.51M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.51M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.51M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
405k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
405k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
405k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
405k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
405k
    }
1373
1.51M
    vcnt++;
1374
1.51M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
624k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
624k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
624k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
624k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
624k
    }
1380
1.51M
    vcnt++;
1381
1.51M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
329k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
329k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
329k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
329k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
329k
    }
1387
1.51M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.51M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
903k
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
903k
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
903k
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
516k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
516k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
516k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
516k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
516k
      }
1401
903k
      vcnt++;
1402
1403
      /* above in last frame */
1404
903k
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
903k
          INTRA_FRAME) {
1406
274k
        near_mvs[vcnt].as_int =
1407
274k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
274k
        mv_bias(
1409
274k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
274k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
274k
        near_ref[vcnt] =
1412
274k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
274k
      }
1414
903k
      vcnt++;
1415
1416
      /* left in last frame */
1417
903k
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
391k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
391k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
391k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
391k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
391k
      }
1423
903k
      vcnt++;
1424
1425
      /* right in last frame */
1426
903k
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
397k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
397k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
397k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
397k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
397k
      }
1432
903k
      vcnt++;
1433
1434
      /* below in last frame */
1435
903k
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
903k
          INTRA_FRAME) {
1437
276k
        near_mvs[vcnt].as_int =
1438
276k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
276k
        mv_bias(
1440
276k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
276k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
276k
        near_ref[vcnt] =
1443
276k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
276k
      }
1445
903k
      vcnt++;
1446
903k
    }
1447
1448
6.37M
    for (i = 0; i < vcnt; ++i) {
1449
5.65M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
1.87M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
792k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
792k
          find = 1;
1453
792k
          if (i < 3) {
1454
711k
            *sr = 3;
1455
711k
          } else {
1456
80.7k
            *sr = 2;
1457
80.7k
          }
1458
792k
          break;
1459
792k
        }
1460
1.87M
      }
1461
5.65M
    }
1462
1463
1.51M
    if (!find) {
1464
4.98M
      for (i = 0; i < vcnt; ++i) {
1465
4.26M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.26M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.26M
      }
1468
1469
720k
      insertsortmv(mvx, vcnt);
1470
720k
      insertsortmv(mvy, vcnt);
1471
720k
      mv.as_mv.row = mvx[vcnt / 2];
1472
720k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
720k
      *sr = 0;
1478
720k
    }
1479
1.51M
  }
1480
1481
  /* Set up return values */
1482
1.51M
  mvp->as_int = mv.as_int;
1483
1.51M
  vp8_clamp_mv2(mvp, xd);
1484
1.51M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.11M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.11M
  int near_sad[8] = { 0 };
1493
1.11M
  BLOCK *b = &x->block[0];
1494
1.11M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.11M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
65.7k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.04M
  } else if (xd->mb_to_top_edge ==
1500
1.04M
             0) { /* only has left MB for sad calculation. */
1501
461k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
461k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
461k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
584k
  } else if (xd->mb_to_left_edge ==
1505
584k
             0) { /* only has left MB for sad calculation. */
1506
86.7k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
86.7k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
86.7k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
86.7k
        xd->dst.y_stride);
1510
497k
  } else {
1511
497k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
497k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
497k
        xd->dst.y_stride);
1514
497k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
497k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
497k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
497k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
497k
        xd->dst.y_stride);
1519
497k
  }
1520
1521
1.11M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
501k
    unsigned char *pre_y_buffer =
1524
501k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
501k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
501k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
501k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
501k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
501k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
501k
    if (near_sad[4] != INT_MAX) {
1533
282k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
282k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
282k
          pre_y_stride);
1536
282k
    }
1537
501k
    if (near_sad[5] != INT_MAX) {
1538
399k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
399k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
399k
    }
1541
501k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
501k
                                               pre_y_buffer, pre_y_stride);
1543
501k
    if (near_sad[6] != INT_MAX) {
1544
401k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
401k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
401k
    }
1547
501k
    if (near_sad[7] != INT_MAX) {
1548
302k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
302k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
302k
          pre_y_stride);
1551
302k
    }
1552
501k
  }
1553
1554
1.11M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
501k
    insertsortsad(near_sad, near_sadidx, 8);
1556
609k
  } else {
1557
609k
    insertsortsad(near_sad, near_sadidx, 3);
1558
609k
  }
1559
1.11M
}
1560
1561
797k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
797k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
181k
    int i;
1564
1565
1.59M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.41M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
604k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
604k
                              best_ref_mv->as_mv.row) >>
1569
604k
                             1);
1570
604k
        const int row_idx = mv_max + row_val;
1571
604k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
604k
                              best_ref_mv->as_mv.col) >>
1573
604k
                             1);
1574
604k
        const int col_idx = mv_max + col_val;
1575
604k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
604k
            col_idx < MVvals) {
1577
604k
          x->MVcount[0][row_idx]++;
1578
604k
          x->MVcount[1][col_idx]++;
1579
604k
        }
1580
604k
      }
1581
1.41M
    }
1582
616k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
83.4k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
83.4k
                          best_ref_mv->as_mv.row) >>
1585
83.4k
                         1);
1586
83.4k
    const int row_idx = mv_max + row_val;
1587
83.4k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
83.4k
                          best_ref_mv->as_mv.col) >>
1589
83.4k
                         1);
1590
83.4k
    const int col_idx = mv_max + col_val;
1591
83.4k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
83.4k
      x->MVcount[0][row_idx]++;
1593
83.4k
      x->MVcount[1][col_idx]++;
1594
83.4k
    }
1595
83.4k
  }
1596
797k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
2.93M
                                  MACROBLOCK *x) {
1601
2.93M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
2.93M
  BLOCK *b = &x->block[0];
1603
2.93M
  MACROBLOCKD *xd = &x->e_mbd;
1604
2.93M
  int distortion;
1605
2.93M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
2.93M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
2.93M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
2.93M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
2.93M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
2.93M
  rd->rate2 += rd->rate_y;
1649
2.93M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
2.93M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
2.93M
                   cpi->common.full_pixel);
1654
2.93M
  rd->rate2 += rd->rate_uv;
1655
2.93M
  rd->distortion2 += rd->distortion_uv;
1656
2.93M
  return INT_MAX;
1657
2.93M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
6.94M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
6.94M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
6.94M
  if (cpi->common.mb_no_coeff_skip) {
1670
6.94M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
6.94M
    rd->rate2 += *other_cost;
1672
6.94M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
6.94M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
6.94M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
6.26M
    if (cpi->common.mb_no_coeff_skip) {
1685
6.26M
      int i;
1686
6.26M
      int tteob;
1687
6.26M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
6.26M
      tteob = 0;
1690
6.26M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
106M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
6.26M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
29.8M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.31M
      } else {
1697
2.95M
        tteob += uv_intra_tteob;
1698
2.95M
      }
1699
1700
6.26M
      if (tteob == 0) {
1701
450k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
450k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
450k
        if (cpi->prob_skip_false) {
1707
450k
          int prob_skip_cost;
1708
1709
450k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
450k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
450k
          rd->rate2 += prob_skip_cost;
1712
450k
          *other_cost += prob_skip_cost;
1713
450k
        }
1714
450k
      }
1715
6.26M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
6.26M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
6.26M
    if (this_rd < INT_MAX &&
1719
6.26M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
2.95M
      this_rd += intra_rd_penalty;
1721
2.95M
    }
1722
6.26M
  }
1723
6.94M
  return this_rd;
1724
6.94M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.52M
                             MACROBLOCK *x) {
1729
2.52M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.52M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.52M
  best_mode->yrd =
1735
2.52M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.52M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.52M
  best_mode->rd = this_rd;
1739
2.52M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.52M
  best_mode->partition = *x->partition_info;
1741
1742
2.52M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
527k
    int i;
1744
8.97M
    for (i = 0; i < 16; ++i) {
1745
8.44M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
8.44M
    }
1747
527k
  }
1748
2.52M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
797k
                            int mb_col) {
1754
797k
  BLOCK *b = &x->block[0];
1755
797k
  BLOCKD *d = &x->e_mbd.block[0];
1756
797k
  MACROBLOCKD *xd = &x->e_mbd;
1757
797k
  int_mv best_ref_mv_sb[2];
1758
797k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
797k
  int_mv best_ref_mv;
1760
797k
  int_mv *mode_mv;
1761
797k
  MB_PREDICTION_MODE this_mode;
1762
797k
  int num00;
1763
797k
  int best_mode_index = 0;
1764
797k
  BEST_MODE best_mode;
1765
1766
797k
  int i;
1767
797k
  int mode_index;
1768
797k
  int mdcounts[4];
1769
797k
  int rate;
1770
797k
  RATE_DISTORTION rd;
1771
797k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
797k
  int uv_intra_tteob = 0;
1773
797k
  int uv_intra_done = 0;
1774
1775
797k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
797k
  int_mv mvp;
1777
797k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
797k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
797k
  int sr = 0;
1781
1782
797k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
797k
  int ref_frame_map[4];
1784
797k
  int sign_bias = 0;
1785
1786
797k
  int intra_rd_penalty =
1787
797k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
797k
#if CONFIG_TEMPORAL_DENOISING
1790
797k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
797k
               best_rd_sse = UINT_MAX;
1792
797k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
797k
  rd.rate_uv = 0;
1796
797k
  rd.distortion_uv = 0;
1797
1798
797k
  mode_mv = mode_mv_sb[sign_bias];
1799
797k
  best_ref_mv.as_int = 0;
1800
797k
  best_mode.rd = INT_MAX;
1801
797k
  best_mode.yrd = INT_MAX;
1802
797k
  best_mode.intra_rd = INT_MAX;
1803
797k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
797k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
797k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
797k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
797k
  if (ref_frame_map[1] > 0) {
1814
797k
    sign_bias = vp8_find_near_mvs_bias(
1815
797k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
797k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
797k
    mode_mv = mode_mv_sb[sign_bias];
1819
797k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
797k
  }
1821
1822
797k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
797k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
797k
  x->mbs_tested_so_far++;
1827
1828
797k
  x->skip = 0;
1829
1830
16.7M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
15.9M
    int this_rd = INT_MAX;
1832
15.9M
    int disable_skip = 0;
1833
15.9M
    int other_cost = 0;
1834
15.9M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
15.9M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
14.2M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
8.76M
    rd.rate2 = 0;
1845
8.76M
    rd.distortion2 = 0;
1846
1847
8.76M
    this_mode = vp8_mode_order[mode_index];
1848
1849
8.76M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
8.76M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
8.76M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
8.76M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.47M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.47M
             plane[this_ref_frame][1] != NULL &&
1867
5.47M
             plane[this_ref_frame][2] != NULL);
1868
5.47M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.47M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.47M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.47M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.47M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
8.76M
    if (x->mode_test_hit_counts[mode_index] &&
1884
8.76M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
219k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
219k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
117k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
117k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
23.6k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
23.6k
        }
1895
1896
117k
        x->rd_threshes[mode_index] =
1897
117k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
117k
            x->rd_thresh_mult[mode_index];
1899
1900
117k
        continue;
1901
117k
      }
1902
219k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.64M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.64M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.64M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
797k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
797k
                              &uv_intra_distortion);
1936
797k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
7.18M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
797k
      uv_intra_done = 1;
1946
797k
    }
1947
1948
8.64M
    switch (this_mode) {
1949
545k
      case B_PRED: {
1950
545k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
545k
        int distortion;
1956
545k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
545k
                                           best_mode.yrd);
1958
545k
        rd.rate2 += rate;
1959
545k
        rd.distortion2 += distortion;
1960
1961
545k
        if (tmp_rd < best_mode.yrd) {
1962
211k
          assert(uv_intra_done);
1963
211k
          rd.rate2 += uv_intra_rate;
1964
211k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
211k
          rd.distortion2 += uv_intra_distortion;
1966
211k
          rd.distortion_uv = uv_intra_distortion;
1967
334k
        } else {
1968
334k
          this_rd = INT_MAX;
1969
334k
          disable_skip = 1;
1970
334k
        }
1971
545k
        break;
1972
0
      }
1973
1974
726k
      case SPLITMV: {
1975
726k
        int tmp_rd;
1976
726k
        int this_rd_thresh;
1977
726k
        int distortion;
1978
1979
726k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
726k
                             ? x->rd_threshes[THR_NEW1]
1981
726k
                             : x->rd_threshes[THR_NEW3];
1982
726k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
726k
                             ? x->rd_threshes[THR_NEW2]
1984
726k
                             : this_rd_thresh;
1985
1986
726k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
726k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
726k
            &distortion, this_rd_thresh);
1989
1990
726k
        rd.rate2 += rate;
1991
726k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
726k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
382k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
382k
                         cpi->common.full_pixel);
2000
382k
          rd.rate2 += rd.rate_uv;
2001
382k
          rd.distortion2 += rd.distortion_uv;
2002
382k
        } else {
2003
344k
          this_rd = INT_MAX;
2004
344k
          disable_skip = 1;
2005
344k
        }
2006
726k
        break;
2007
0
      }
2008
797k
      case DC_PRED:
2009
1.44M
      case V_PRED:
2010
2.10M
      case H_PRED:
2011
2.74M
      case TM_PRED: {
2012
2.74M
        int distortion;
2013
2.74M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.74M
        vp8_build_intra_predictors_mby_s(
2016
2.74M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.74M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.74M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.74M
        rd.rate2 += rd.rate_y;
2020
2.74M
        rd.distortion2 += distortion;
2021
2.74M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.74M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.74M
        assert(uv_intra_done);
2024
2.74M
        rd.rate2 += uv_intra_rate;
2025
2.74M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.74M
        rd.distortion2 += uv_intra_distortion;
2027
2.74M
        rd.distortion_uv = uv_intra_distortion;
2028
2.74M
        break;
2029
2.10M
      }
2030
2031
985k
      case NEWMV: {
2032
985k
        int thissme;
2033
985k
        int bestsme = INT_MAX;
2034
985k
        int step_param = cpi->sf.first_step;
2035
985k
        int further_steps;
2036
985k
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
985k
        int do_refine = 1;
2041
2042
985k
        int sadpb = x->sadperbit16;
2043
985k
        int_mv mvp_full;
2044
2045
985k
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
985k
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
985k
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
985k
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
985k
        int tmp_col_min = x->mv_col_min;
2051
985k
        int tmp_col_max = x->mv_col_max;
2052
985k
        int tmp_row_min = x->mv_row_min;
2053
985k
        int tmp_row_max = x->mv_row_max;
2054
2055
985k
        if (!saddone) {
2056
672k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
672k
          saddone = 1;
2058
672k
        }
2059
2060
985k
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
985k
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
985k
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
985k
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
985k
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
985k
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
985k
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
985k
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
985k
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
985k
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
985k
        {
2080
985k
          bestsme = cpi->diamond_search_sad(
2081
985k
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
985k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
985k
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
985k
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
985k
          n = num00;
2089
985k
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
985k
          if (n > further_steps) do_refine = 0;
2094
2095
4.30M
          while (n < further_steps) {
2096
3.31M
            n++;
2097
2098
3.31M
            if (num00) {
2099
304k
              num00--;
2100
3.01M
            } else {
2101
3.01M
              thissme = cpi->diamond_search_sad(
2102
3.01M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
3.01M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
3.01M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
3.01M
              if (thissme < bestsme) {
2109
452k
                bestsme = thissme;
2110
452k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.55M
              } else {
2112
2.55M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.55M
              }
2114
3.01M
            }
2115
3.31M
          }
2116
985k
        }
2117
2118
        /* final 1-away diamond refining search */
2119
985k
        if (do_refine == 1) {
2120
628k
          int search_range;
2121
2122
628k
          search_range = 8;
2123
2124
628k
          thissme = cpi->refining_search_sad(
2125
628k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
628k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
628k
          if (thissme < bestsme) {
2129
27.1k
            bestsme = thissme;
2130
27.1k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
601k
          } else {
2132
601k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
601k
          }
2134
628k
        }
2135
2136
985k
        x->mv_col_min = tmp_col_min;
2137
985k
        x->mv_col_max = tmp_col_max;
2138
985k
        x->mv_row_min = tmp_row_min;
2139
985k
        x->mv_row_max = tmp_row_max;
2140
2141
985k
        if (bestsme < INT_MAX) {
2142
985k
          int dis; /* TODO: use dis in distortion calculation later. */
2143
985k
          unsigned int sse;
2144
985k
          cpi->find_fractional_mv_step(
2145
985k
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
985k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
985k
        }
2148
2149
985k
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
985k
        rd.rate2 +=
2153
985k
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
985k
      }
2155
        // fall through
2156
2157
2.20M
      case NEARESTMV:
2158
3.41M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.41M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.41M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
3.41M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.70M
          continue;
2171
1.70M
        }
2172
        // fall through
2173
2174
2.93M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
2.93M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
2.93M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
2.93M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
2.93M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
2.93M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
2.93M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
2.93M
        break;
2191
2192
0
      default: break;
2193
8.64M
    }
2194
2195
6.94M
    this_rd =
2196
6.94M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
6.94M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
6.94M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
6.94M
        (this_rd < best_mode.intra_rd)) {
2202
1.16M
      best_mode.intra_rd = this_rd;
2203
1.16M
      *returnintra = rd.distortion2;
2204
1.16M
    }
2205
6.94M
#if CONFIG_TEMPORAL_DENOISING
2206
6.94M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
6.94M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
6.94M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.52M
      best_mode_index = mode_index;
2238
2.52M
      *returnrate = rd.rate2;
2239
2.52M
      *returndistortion = rd.distortion2;
2240
2.52M
      if (this_mode <= B_PRED) {
2241
1.02M
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
1.02M
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
1.02M
      }
2245
2.52M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.52M
      x->rd_thresh_mult[mode_index] =
2251
2.52M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.52M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.52M
              : MIN_THRESHMULT;
2254
2.52M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.41M
    else {
2260
4.41M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.41M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.26M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.26M
      }
2265
4.41M
    }
2266
6.94M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
6.94M
                                 x->rd_thresh_mult[mode_index];
2268
2269
6.94M
    if (x->skip) break;
2270
6.94M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
797k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
797k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
489k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
489k
    x->rd_thresh_mult[best_mode_index] =
2278
489k
        (x->rd_thresh_mult[best_mode_index] >=
2279
489k
         (MIN_THRESHMULT + best_adjustment))
2280
489k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
489k
            : MIN_THRESHMULT;
2282
489k
    x->rd_threshes[best_mode_index] =
2283
489k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
489k
        x->rd_thresh_mult[best_mode_index];
2285
489k
  }
2286
2287
797k
#if CONFIG_TEMPORAL_DENOISING
2288
797k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
797k
#endif
2335
2336
797k
  if (cpi->is_src_frame_alt_ref &&
2337
797k
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
797k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
797k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.50M
    for (i = 0; i < 16; ++i) {
2354
3.29M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.29M
    }
2356
205k
  }
2357
2358
797k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
3.08M
    for (i = 0; i < 16; ++i) {
2360
2.90M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.90M
    }
2362
2363
181k
    *x->partition_info = best_mode.partition;
2364
2365
181k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
181k
        x->partition_info->bmi[15].mv.as_int;
2367
181k
  }
2368
2369
797k
  if (sign_bias !=
2370
797k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
797k
  rd_update_mvcount(x, &best_ref_mv);
2375
797k
}
2376
2377
824k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
824k
  int error4x4, error16x16;
2379
824k
  int rate4x4, rate16x16 = 0, rateuv;
2380
824k
  int dist4x4, dist16x16, distuv;
2381
824k
  int rate_;
2382
824k
  int rate4x4_tokenonly = 0;
2383
824k
  int rate16x16_tokenonly = 0;
2384
824k
  int rateuv_tokenonly = 0;
2385
2386
824k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
824k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
824k
  rate_ = rateuv;
2390
2391
824k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
824k
                                          &dist16x16);
2393
2394
824k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
824k
                                       &dist4x4, error16x16);
2396
2397
824k
  if (error4x4 < error16x16) {
2398
313k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
313k
    rate_ += rate4x4;
2400
511k
  } else {
2401
511k
    rate_ += rate16x16;
2402
511k
  }
2403
2404
824k
  *rate = rate_;
2405
824k
}