Coverage Report

Created: 2026-04-01 07:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.60M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
148k
                    [ENTROPY_NODES]) {
116
148k
  int i, j, k;
117
118
743k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
5.35M
    for (j = 0; j < COEF_BANDS; ++j) {
120
19.0M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
14.2M
        if (k == 0 && j > (i == 0)) {
125
4.01M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
10.2M
        } else {
127
10.2M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
10.2M
        }
129
14.2M
      }
130
4.75M
    }
131
594k
  }
132
148k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
148k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
148k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
148k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
148k
}
162
163
148k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
148k
  int q;
165
148k
  int i;
166
148k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
148k
  double rdconst = 2.80;
168
169
148k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
148k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
148k
  if (cpi->mb.zbin_over_quant > 0) {
178
32.1k
    double oq_factor;
179
32.1k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
32.1k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
32.1k
    modq = (int)((double)capped_q * oq_factor);
186
32.1k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
32.1k
  }
188
189
148k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
148k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
148k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
148k
  vp8_set_speed_features(cpi);
202
203
3.12M
  for (i = 0; i < MAX_MODES; ++i) {
204
2.97M
    x->mode_test_hit_counts[i] = 0;
205
2.97M
  }
206
207
148k
  q = (int)pow(Qvalue, 1.25);
208
209
148k
  if (q < 8) q = 8;
210
211
148k
  if (cpi->RDMULT > 1000) {
212
84.5k
    cpi->RDDIV = 1;
213
84.5k
    cpi->RDMULT /= 100;
214
215
1.77M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.69M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.60M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.60M
      } else {
219
85.2k
        x->rd_threshes[i] = INT_MAX;
220
85.2k
      }
221
222
1.69M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.69M
    }
224
84.5k
  } else {
225
64.0k
    cpi->RDDIV = 100;
226
227
1.34M
    for (i = 0; i < MAX_MODES; ++i) {
228
1.28M
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
1.19M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
1.19M
      } else {
231
87.2k
        x->rd_threshes[i] = INT_MAX;
232
87.2k
      }
233
234
1.28M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
1.28M
    }
236
64.0k
  }
237
238
148k
  {
239
    /* build token cost array for the type of frame we have now */
240
148k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
148k
    if (cpi->common.refresh_alt_ref_frame) {
243
35.5k
      l = &cpi->lfc_a;
244
113k
    } else if (cpi->common.refresh_golden_frame) {
245
11.0k
      l = &cpi->lfc_g;
246
11.0k
    }
247
248
148k
    fill_token_costs(cpi->mb.token_costs,
249
148k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
148k
    vp8_init_mode_costs(cpi);
258
148k
  }
259
148k
}
260
261
57.4k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
57.4k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
57.4k
  milliseconds_for_compress =
265
57.4k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
57.4k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
57.4k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
57.4k
          milliseconds_for_compress) {
284
57.4k
    if (cpi->avg_pick_mode_time == 0) {
285
2.89k
      cpi->Speed = 4;
286
54.6k
    } else {
287
54.6k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
0
        cpi->Speed += 2;
289
0
        cpi->avg_pick_mode_time = 0;
290
0
        cpi->avg_encode_time = 0;
291
292
0
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
0
      }
296
297
54.6k
      if (milliseconds_for_compress * 100 >
298
54.6k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
54.5k
        cpi->Speed -= 1;
300
54.5k
        cpi->avg_pick_mode_time = 0;
301
54.5k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
54.5k
        if (cpi->Speed < 4) {
305
54.5k
          cpi->Speed = 4;
306
54.5k
        }
307
54.5k
      }
308
54.6k
    }
309
57.4k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
57.4k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
14.3k
int VP8_UVSSE(MACROBLOCK *x) {
372
14.3k
  unsigned char *uptr, *vptr;
373
14.3k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
14.3k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
14.3k
  int uv_stride = x->block[16].src_stride;
376
377
14.3k
  unsigned int sse1 = 0;
378
14.3k
  unsigned int sse2 = 0;
379
14.3k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
14.3k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
14.3k
  int offset;
382
14.3k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
14.3k
  if (mv_row < 0) {
385
913
    mv_row -= 1;
386
13.3k
  } else {
387
13.3k
    mv_row += 1;
388
13.3k
  }
389
390
14.3k
  if (mv_col < 0) {
391
1.25k
    mv_col -= 1;
392
13.0k
  } else {
393
13.0k
    mv_col += 1;
394
13.0k
  }
395
396
14.3k
  mv_row /= 2;
397
14.3k
  mv_col /= 2;
398
399
14.3k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
14.3k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
14.3k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
14.3k
  if ((mv_row | mv_col) & 7) {
404
3.28k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
3.28k
                              upred_ptr, uv_stride, &sse2);
406
3.28k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
3.28k
                              vpred_ptr, uv_stride, &sse1);
408
3.28k
    sse2 += sse1;
409
11.0k
  } else {
410
11.0k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
11.0k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
11.0k
    sse2 += sse1;
413
11.0k
  }
414
14.3k
  return sse2;
415
14.3k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
452M
                       ENTROPY_CONTEXT *l) {
419
452M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
452M
  int eob = (int)(*b->eob);
421
452M
  int pt; /* surrounding block/prev coef predictor */
422
452M
  int cost = 0;
423
452M
  short *qcoeff_ptr = b->qcoeff;
424
425
452M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
452M
  assert(eob <= 16);
428
4.35G
  for (; c < eob; ++c) {
429
3.89G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
3.89G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
3.89G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
3.89G
    cost += vp8_dct_value_cost_ptr[v];
433
3.89G
    pt = vp8_prev_token_class[t];
434
3.89G
  }
435
436
452M
  if (c < 16) {
437
292M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
292M
  }
439
440
452M
  pt = (c != !type); /* is eob first coefficient; */
441
452M
  *a = *l = pt;
442
443
452M
  return cost;
444
452M
}
445
446
8.35M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
8.35M
  int cost = 0;
448
8.35M
  int b;
449
8.35M
  MACROBLOCKD *x = &mb->e_mbd;
450
8.35M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
8.35M
  ENTROPY_CONTEXT *ta;
452
8.35M
  ENTROPY_CONTEXT *tl;
453
454
8.35M
  t_above = *mb->e_mbd.above_context;
455
8.35M
  t_left = *mb->e_mbd.left_context;
456
457
8.35M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
8.35M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
142M
  for (b = 0; b < 16; ++b) {
461
133M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
133M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
133M
  }
464
465
8.35M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
8.35M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
8.35M
  return cost;
469
8.35M
}
470
471
8.35M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
8.35M
  int b;
473
8.35M
  MACROBLOCKD *const x = &mb->e_mbd;
474
8.35M
  BLOCK *const mb_y2 = mb->block + 24;
475
8.35M
  BLOCKD *const x_y2 = x->block + 24;
476
8.35M
  short *Y2DCPtr = mb_y2->src_diff;
477
8.35M
  BLOCK *beptr;
478
8.35M
  int d;
479
480
8.35M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
8.35M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
75.2M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
66.8M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
66.8M
    *Y2DCPtr++ = beptr->coeff[0];
487
66.8M
    *Y2DCPtr++ = beptr->coeff[16];
488
66.8M
  }
489
490
  /* 2nd order fdct */
491
8.35M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
142M
  for (b = 0; b < 16; ++b) {
495
133M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
133M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
8.35M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
8.35M
  d = vp8_mbblock_error(mb, 1) << 2;
503
8.35M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
8.35M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
8.35M
  *Rate = vp8_rdcost_mby(mb);
509
8.35M
}
510
511
26.3M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
26.3M
  const unsigned int *p = (const unsigned int *)predictor;
513
26.3M
  unsigned int *d = (unsigned int *)dst;
514
26.3M
  d[0] = p[0];
515
26.3M
  d[4] = p[4];
516
26.3M
  d[8] = p[8];
517
26.3M
  d[12] = p[12];
518
26.3M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
14.1M
                                 int *bestdistortion) {
526
14.1M
  B_PREDICTION_MODE mode;
527
14.1M
  int best_rd = INT_MAX;
528
14.1M
  int rate = 0;
529
14.1M
  int distortion;
530
531
14.1M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
14.1M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
14.1M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
14.1M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
14.1M
  int dst_stride = x->e_mbd.dst.y_stride;
541
14.1M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
14.1M
  unsigned char *Above = dst - dst_stride;
544
14.1M
  unsigned char *yleft = dst - 1;
545
14.1M
  unsigned char top_left = Above[-1];
546
547
156M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
141M
    int this_rd;
549
141M
    int ratey;
550
551
141M
    rate = bmode_costs[mode];
552
553
141M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
141M
                         top_left);
555
141M
    vp8_subtract_b(be, b, 16);
556
141M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
141M
    x->quantize_b(be, b);
558
559
141M
    tempa = ta;
560
141M
    templ = tl;
561
562
141M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
141M
    rate += ratey;
564
141M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
141M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
141M
    if (this_rd < best_rd) {
569
26.3M
      *bestrate = rate;
570
26.3M
      *bestratey = ratey;
571
26.3M
      *bestdistortion = distortion;
572
26.3M
      best_rd = this_rd;
573
26.3M
      *best_mode = mode;
574
26.3M
      *a = tempa;
575
26.3M
      *l = templ;
576
26.3M
      copy_predictor(best_predictor, b->predictor);
577
26.3M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
26.3M
    }
579
141M
  }
580
14.1M
  b->bmi.as_mode = *best_mode;
581
582
14.1M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
14.1M
  return best_rd;
585
14.1M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.27M
                                     int *Distortion, int best_rd) {
589
1.27M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.27M
  int i;
591
1.27M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.27M
  int distortion = 0;
593
1.27M
  int tot_rate_y = 0;
594
1.27M
  int64_t total_rd = 0;
595
1.27M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.27M
  ENTROPY_CONTEXT *ta;
597
1.27M
  ENTROPY_CONTEXT *tl;
598
1.27M
  const int *bmode_costs;
599
600
1.27M
  t_above = *mb->e_mbd.above_context;
601
1.27M
  t_left = *mb->e_mbd.left_context;
602
603
1.27M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.27M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.27M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.27M
  bmode_costs = mb->inter_bmode_costs;
609
610
14.7M
  for (i = 0; i < 16; ++i) {
611
14.1M
    MODE_INFO *const mic = xd->mode_info_context;
612
14.1M
    const int mis = xd->mode_info_stride;
613
14.1M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
14.1M
    int r = 0, ry = 0, d = 0;
615
616
14.1M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
8.07M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
8.07M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
8.07M
      bmode_costs = mb->bmode_costs[A][L];
621
8.07M
    }
622
623
14.1M
    total_rd += rd_pick_intra4x4block(
624
14.1M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
14.1M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
14.1M
    cost += r;
628
14.1M
    distortion += d;
629
14.1M
    tot_rate_y += ry;
630
631
14.1M
    assert(best_mode != B_MODE_COUNT);
632
14.1M
    mic->bmi[i].as_mode = best_mode;
633
634
14.1M
    if (total_rd >= (int64_t)best_rd) break;
635
14.1M
  }
636
637
1.27M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
575k
  *Rate = cost;
640
575k
  *rate_y = tot_rate_y;
641
575k
  *Distortion = distortion;
642
643
575k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.27M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
741k
                                      int *Distortion) {
648
741k
  MB_PREDICTION_MODE mode;
649
741k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
741k
  int rate, ratey;
651
741k
  int distortion;
652
741k
  int best_rd = INT_MAX;
653
741k
  int this_rd;
654
741k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
3.70M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
2.96M
    xd->mode_info_context->mbmi.mode = mode;
659
660
2.96M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
2.96M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
2.96M
                                     xd->predictor, 16);
663
664
2.96M
    macro_block_yrd(x, &ratey, &distortion);
665
2.96M
    rate = ratey +
666
2.96M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
2.96M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
2.96M
    if (this_rd < best_rd) {
671
915k
      mode_selected = mode;
672
915k
      best_rd = this_rd;
673
915k
      *Rate = rate;
674
915k
      *rate_y = ratey;
675
915k
      *Distortion = distortion;
676
915k
    }
677
2.96M
  }
678
679
741k
  assert(mode_selected != MB_MODE_COUNT);
680
741k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
741k
  return best_rd;
682
741k
}
683
684
9.20M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
9.20M
  int b;
686
9.20M
  int cost = 0;
687
9.20M
  MACROBLOCKD *x = &mb->e_mbd;
688
9.20M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
9.20M
  ENTROPY_CONTEXT *ta;
690
9.20M
  ENTROPY_CONTEXT *tl;
691
692
9.20M
  t_above = *mb->e_mbd.above_context;
693
9.20M
  t_left = *mb->e_mbd.left_context;
694
695
9.20M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
9.20M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
82.8M
  for (b = 16; b < 24; ++b) {
699
73.6M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
73.6M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
73.6M
  }
702
703
9.20M
  return cost;
704
9.20M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
2.81M
                            int *distortion, int fullpixel) {
708
2.81M
  (void)cpi;
709
2.81M
  (void)fullpixel;
710
711
2.81M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
2.81M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
2.81M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
2.81M
                    &x->e_mbd.predictor[320], 8);
715
716
2.81M
  vp8_transform_mbuv(x);
717
2.81M
  vp8_quantize_mbuv(x);
718
719
2.81M
  *rate = rd_cost_mbuv(x);
720
2.81M
  *distortion = vp8_mbuverror(x) / 4;
721
722
2.81M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
2.81M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
400k
                          int *distortion, int fullpixel) {
727
400k
  (void)cpi;
728
400k
  (void)fullpixel;
729
730
400k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
400k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
400k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
400k
                    &x->e_mbd.predictor[320], 8);
734
735
400k
  vp8_transform_mbuv(x);
736
400k
  vp8_quantize_mbuv(x);
737
738
400k
  *rate = rd_cost_mbuv(x);
739
400k
  *distortion = vp8_mbuverror(x) / 4;
740
741
400k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
400k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.49M
                                    int *rate_tokenonly, int *distortion) {
746
1.49M
  MB_PREDICTION_MODE mode;
747
1.49M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.49M
  int best_rd = INT_MAX;
749
1.49M
  int d = 0, r = 0;
750
1.49M
  int rate_to;
751
1.49M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
7.49M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
5.99M
    int this_rate;
755
5.99M
    int this_distortion;
756
5.99M
    int this_rd;
757
758
5.99M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
5.99M
    vp8_build_intra_predictors_mbuv_s(
761
5.99M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
5.99M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
5.99M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
5.99M
        &xd->predictor[320], 8);
765
766
5.99M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
5.99M
                      x->src.uv_stride, &xd->predictor[256],
768
5.99M
                      &xd->predictor[320], 8);
769
5.99M
    vp8_transform_mbuv(x);
770
5.99M
    vp8_quantize_mbuv(x);
771
772
5.99M
    rate_to = rd_cost_mbuv(x);
773
5.99M
    this_rate =
774
5.99M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
5.99M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
5.99M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
5.99M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
5.99M
    if (this_rd < best_rd) {
782
1.86M
      best_rd = this_rd;
783
1.86M
      d = this_distortion;
784
1.86M
      r = this_rate;
785
1.86M
      *rate_tokenonly = rate_to;
786
1.86M
      mode_selected = mode;
787
1.86M
    }
788
5.99M
  }
789
790
1.49M
  *rate = r;
791
1.49M
  *distortion = d;
792
793
1.49M
  assert(mode_selected != MB_MODE_COUNT);
794
1.49M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.49M
}
796
797
6.94M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
6.94M
  vp8_prob p[VP8_MVREFS - 1];
799
6.94M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
6.94M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
6.94M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
6.94M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
6.94M
}
804
805
2.81M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
2.81M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
2.81M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
2.81M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
31.1M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
31.1M
  MACROBLOCKD *const xd = &x->e_mbd;
814
31.1M
  MODE_INFO *const mic = xd->mode_info_context;
815
31.1M
  const int mis = xd->mode_info_stride;
816
817
31.1M
  int cost = 0;
818
31.1M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
31.1M
  int i = 0;
825
826
498M
  do {
827
498M
    BLOCKD *const d = xd->block + i;
828
498M
    const int row = i >> 2, col = i & 3;
829
830
498M
    B_PREDICTION_MODE m;
831
832
498M
    if (labelings[i] != which_label) continue;
833
834
121M
    if (col && labelings[i] == labelings[i - 1]) {
835
62.8M
      m = LEFT4X4;
836
62.8M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
27.6M
      m = ABOVE4X4;
838
31.1M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
31.1M
      switch (m = this_mode) {
843
8.58M
        case NEW4X4:
844
8.58M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.58M
          break;
846
9.08M
        case LEFT4X4:
847
9.08M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
9.08M
          break;
849
7.08M
        case ABOVE4X4:
850
7.08M
          this_mv->as_int =
851
7.08M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
7.08M
          break;
853
6.38M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
31.1M
      }
856
857
31.1M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
7.08M
        int_mv left_mv;
859
860
7.08M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
7.08M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
7.08M
      }
864
865
31.1M
      cost = x->inter_bmode_costs[m];
866
31.1M
    }
867
868
121M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
121M
    x->partition_info->bmi[i].mode = m;
871
121M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
498M
  } while (++i < 16);
874
875
31.1M
  cost += thismvcost;
876
31.1M
  return cost;
877
31.1M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
24.2M
                              ENTROPY_CONTEXT *tl) {
882
24.2M
  int cost = 0;
883
24.2M
  int b;
884
24.2M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
412M
  for (b = 0; b < 16; ++b) {
887
388M
    if (labels[b] == which_label) {
888
94.5M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
94.5M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
94.5M
    }
891
388M
  }
892
893
24.2M
  return cost;
894
24.2M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
24.2M
                                                int which_label) {
898
24.2M
  int i;
899
24.2M
  unsigned int distortion = 0;
900
24.2M
  int pre_stride = x->e_mbd.pre.y_stride;
901
24.2M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
412M
  for (i = 0; i < 16; ++i) {
904
388M
    if (labels[i] == which_label) {
905
94.5M
      BLOCKD *bd = &x->e_mbd.block[i];
906
94.5M
      BLOCK *be = &x->block[i];
907
908
94.5M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
94.5M
                                   x->e_mbd.subpixel_predict);
910
94.5M
      vp8_subtract_b(be, bd, 16);
911
94.5M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
94.5M
      x->quantize_b(be, bd);
913
914
94.5M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
94.5M
    }
916
388M
  }
917
918
24.2M
  return distortion;
919
24.2M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.66M
                             unsigned int segmentation) {
946
1.66M
  int i;
947
1.66M
  int const *labels;
948
1.66M
  int br = 0;
949
1.66M
  int bd = 0;
950
1.66M
  B_PREDICTION_MODE this_mode;
951
952
1.66M
  int label_count;
953
1.66M
  int this_segment_rd = 0;
954
1.66M
  int label_mv_thresh;
955
1.66M
  int rate = 0;
956
1.66M
  int sbr = 0;
957
1.66M
  int sbd = 0;
958
1.66M
  int segmentyrate = 0;
959
960
1.66M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.66M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.66M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.66M
  t_above = *x->e_mbd.above_context;
966
1.66M
  t_left = *x->e_mbd.left_context;
967
968
1.66M
  vp8_zero(t_above_b);
969
1.66M
  vp8_zero(t_left_b);
970
971
1.66M
  br = 0;
972
1.66M
  bd = 0;
973
974
1.66M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.66M
  labels = vp8_mbsplits[segmentation];
976
1.66M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.66M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.66M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.66M
                        vp8_mbsplit_encodings + segmentation);
987
1.66M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.66M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.66M
  br += rate;
990
991
7.13M
  for (i = 0; i < label_count; ++i) {
992
6.32M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.32M
    int best_label_rd = INT_MAX;
994
6.32M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.32M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
31.1M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
25.2M
      int this_rd;
1000
25.2M
      int distortion;
1001
25.2M
      int labelyrate;
1002
25.2M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
25.2M
      ENTROPY_CONTEXT *ta_s;
1004
25.2M
      ENTROPY_CONTEXT *tl_s;
1005
1006
25.2M
      t_above_s = t_above;
1007
25.2M
      t_left_s = t_left;
1008
1009
25.2M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
25.2M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
25.2M
      if (this_mode == NEW4X4) {
1013
6.32M
        int sseshift;
1014
6.32M
        int num00;
1015
6.32M
        int step_param = 0;
1016
6.32M
        int further_steps;
1017
6.32M
        int n;
1018
6.32M
        int thissme;
1019
6.32M
        int bestsme = INT_MAX;
1020
6.32M
        int_mv temp_mv;
1021
6.32M
        BLOCK *c;
1022
6.32M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.32M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
5.85M
        if (cpi->compressor_speed) {
1030
5.85M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.49M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.49M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
350k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
350k
            }
1035
1036
1.49M
            step_param = bsi->sv_istep[i];
1037
1.49M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
5.85M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
1.99M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
1.99M
            if (i == 4 || i == 8 || i == 12) {
1045
407k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
407k
            }
1047
1.99M
            step_param = 2;
1048
1.99M
          }
1049
5.85M
        }
1050
1051
5.85M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
5.85M
        {
1054
5.85M
          int sadpb = x->sadperbit4;
1055
5.85M
          int_mv mvp_full;
1056
1057
5.85M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
5.85M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
5.85M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
5.85M
          c = &x->block[n];
1064
5.85M
          e = &x->e_mbd.block[n];
1065
1066
5.85M
          {
1067
5.85M
            bestsme = cpi->diamond_search_sad(
1068
5.85M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
5.85M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
5.85M
            n = num00;
1072
5.85M
            num00 = 0;
1073
1074
22.8M
            while (n < further_steps) {
1075
17.0M
              n++;
1076
1077
17.0M
              if (num00) {
1078
2.15M
                num00--;
1079
14.8M
              } else {
1080
14.8M
                thissme = cpi->diamond_search_sad(
1081
14.8M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
14.8M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
14.8M
                if (thissme < bestsme) {
1085
2.80M
                  bestsme = thissme;
1086
2.80M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
2.80M
                }
1088
14.8M
              }
1089
17.0M
            }
1090
5.85M
          }
1091
1092
5.85M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
5.85M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
5.85M
        }
1114
1115
5.85M
        if (bestsme < INT_MAX) {
1116
5.85M
          int disto;
1117
5.85M
          unsigned int sse;
1118
5.85M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
5.85M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
5.85M
                                       &disto, &sse);
1121
5.85M
        }
1122
5.85M
      } /* NEW4X4 */
1123
1124
24.8M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
24.8M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
24.8M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
24.7M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
24.5M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
24.5M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
536k
        continue;
1133
536k
      }
1134
1135
24.2M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
24.2M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
24.2M
      rate += labelyrate;
1139
1140
24.2M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
24.2M
      if (this_rd < best_label_rd) {
1143
10.7M
        sbr = rate;
1144
10.7M
        sbd = distortion;
1145
10.7M
        bestlabelyrate = labelyrate;
1146
10.7M
        mode_selected = this_mode;
1147
10.7M
        best_label_rd = this_rd;
1148
1149
10.7M
        t_above_b = t_above_s;
1150
10.7M
        t_left_b = t_left_s;
1151
10.7M
      }
1152
24.2M
    } /*for each 4x4 mode*/
1153
1154
6.32M
    t_above = t_above_b;
1155
6.32M
    t_left = t_left_b;
1156
1157
6.32M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.32M
                bsi->ref_mv, x->mvcost);
1159
1160
6.32M
    br += sbr;
1161
6.32M
    bd += sbd;
1162
6.32M
    segmentyrate += bestlabelyrate;
1163
6.32M
    this_segment_rd += best_label_rd;
1164
1165
6.32M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.32M
  } /* for each label */
1168
1169
1.66M
  if (this_segment_rd < bsi->segment_rd) {
1170
808k
    bsi->r = br;
1171
808k
    bsi->d = bd;
1172
808k
    bsi->segment_yrate = segmentyrate;
1173
808k
    bsi->segment_rd = this_segment_rd;
1174
808k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
13.7M
    for (i = 0; i < 16; ++i) {
1178
12.9M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
12.9M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
12.9M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
12.9M
    }
1182
808k
  }
1183
1.66M
}
1184
1185
1.60M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.60M
  int step = 0;
1187
1188
1.60M
  if (sr > MAX_FIRST_STEP) {
1189
56.4k
    sr = MAX_FIRST_STEP;
1190
1.54M
  } else if (sr < 1) {
1191
752k
    sr = 1;
1192
752k
  }
1193
1194
4.69M
  while (sr >>= 1) step++;
1195
1196
1.60M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.60M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
715k
                                           int mvthresh) {
1205
715k
  int i;
1206
715k
  BEST_SEG_INFO bsi;
1207
1208
715k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
715k
  bsi.segment_rd = best_rd;
1211
715k
  bsi.ref_mv = best_ref_mv;
1212
715k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
715k
  bsi.mvthresh = mvthresh;
1214
715k
  bsi.mdcounts = mdcounts;
1215
1216
12.1M
  for (i = 0; i < 16; ++i) {
1217
11.4M
    bsi.modes[i] = ZERO4X4;
1218
11.4M
  }
1219
1220
715k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
715k
  } else {
1228
715k
    int sr;
1229
1230
715k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
715k
    if (bsi.segment_rd < best_rd) {
1233
400k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
400k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
400k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
400k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
400k
      int tmp_col_min = x->mv_col_min;
1239
400k
      int tmp_col_max = x->mv_col_max;
1240
400k
      int tmp_row_min = x->mv_row_min;
1241
400k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
400k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
400k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
400k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
400k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
400k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
400k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
400k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
400k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
400k
      {
1260
400k
        sr =
1261
400k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
400k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
400k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
400k
        sr =
1266
400k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
400k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
400k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
400k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
400k
      }
1272
1273
      /* block 16X8 */
1274
400k
      {
1275
400k
        sr =
1276
400k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
400k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
400k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
400k
        sr =
1281
400k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
400k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
400k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
400k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
400k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
400k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
149k
      {
1293
149k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
149k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
149k
      }
1296
1297
      /* restore UMV window */
1298
400k
      x->mv_col_min = tmp_col_min;
1299
400k
      x->mv_col_max = tmp_col_max;
1300
400k
      x->mv_row_min = tmp_row_min;
1301
400k
      x->mv_row_max = tmp_row_max;
1302
400k
    }
1303
715k
  }
1304
1305
  /* set it to the best */
1306
12.1M
  for (i = 0; i < 16; ++i) {
1307
11.4M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
11.4M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
11.4M
    *bd->eob = bsi.eobs[i];
1311
11.4M
  }
1312
1313
715k
  *returntotrate = bsi.r;
1314
715k
  *returndistortion = bsi.d;
1315
715k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
715k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
715k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.66M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
2.94M
    int j;
1323
1324
2.94M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
2.94M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
2.94M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
2.94M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
715k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
715k
  return bsi.segment_rd;
1335
715k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.60M
                 int near_sadidx[]) {
1341
1.60M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.60M
  const MODE_INFO *left = here - 1;
1343
1.60M
  const MODE_INFO *aboveleft = above - 1;
1344
1.60M
  int_mv near_mvs[8];
1345
1.60M
  int near_ref[8];
1346
1.60M
  int_mv mv;
1347
1.60M
  int vcnt = 0;
1348
1.60M
  int find = 0;
1349
1.60M
  int mb_offset;
1350
1351
1.60M
  int mvx[8];
1352
1.60M
  int mvy[8];
1353
1.60M
  int i;
1354
1355
1.60M
  mv.as_int = 0;
1356
1357
1.60M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.60M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.60M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.60M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.60M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.60M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.60M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
483k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
483k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
483k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
483k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
483k
    }
1373
1.60M
    vcnt++;
1374
1.60M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
646k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
646k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
646k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
646k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
646k
    }
1380
1.60M
    vcnt++;
1381
1.60M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
376k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
376k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
376k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
376k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
376k
    }
1387
1.60M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.60M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
983k
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
983k
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
983k
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
605k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
605k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
605k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
605k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
605k
      }
1401
983k
      vcnt++;
1402
1403
      /* above in last frame */
1404
983k
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
983k
          INTRA_FRAME) {
1406
354k
        near_mvs[vcnt].as_int =
1407
354k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
354k
        mv_bias(
1409
354k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
354k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
354k
        near_ref[vcnt] =
1412
354k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
354k
      }
1414
983k
      vcnt++;
1415
1416
      /* left in last frame */
1417
983k
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
413k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
413k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
413k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
413k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
413k
      }
1423
983k
      vcnt++;
1424
1425
      /* right in last frame */
1426
983k
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
416k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
416k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
416k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
416k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
416k
      }
1432
983k
      vcnt++;
1433
1434
      /* below in last frame */
1435
983k
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
983k
          INTRA_FRAME) {
1437
345k
        near_mvs[vcnt].as_int =
1438
345k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
345k
        mv_bias(
1440
345k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
345k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
345k
        near_ref[vcnt] =
1443
345k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
345k
      }
1445
983k
      vcnt++;
1446
983k
    }
1447
1448
6.55M
    for (i = 0; i < vcnt; ++i) {
1449
5.84M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
2.05M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
890k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
890k
          find = 1;
1453
890k
          if (i < 3) {
1454
808k
            *sr = 3;
1455
808k
          } else {
1456
82.7k
            *sr = 2;
1457
82.7k
          }
1458
890k
          break;
1459
890k
        }
1460
2.05M
      }
1461
5.84M
    }
1462
1463
1.60M
    if (!find) {
1464
5.02M
      for (i = 0; i < vcnt; ++i) {
1465
4.30M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.30M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.30M
      }
1468
1469
716k
      insertsortmv(mvx, vcnt);
1470
716k
      insertsortmv(mvy, vcnt);
1471
716k
      mv.as_mv.row = mvx[vcnt / 2];
1472
716k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
716k
      *sr = 0;
1478
716k
    }
1479
1.60M
  }
1480
1481
  /* Set up return values */
1482
1.60M
  mvp->as_int = mv.as_int;
1483
1.60M
  vp8_clamp_mv2(mvp, xd);
1484
1.60M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.16M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.16M
  int near_sad[8] = { 0 };
1493
1.16M
  BLOCK *b = &x->block[0];
1494
1.16M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.16M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
86.3k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.08M
  } else if (xd->mb_to_top_edge ==
1500
1.08M
             0) { /* only has left MB for sad calculation. */
1501
453k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
453k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
453k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
629k
  } else if (xd->mb_to_left_edge ==
1505
629k
             0) { /* only has left MB for sad calculation. */
1506
113k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
113k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
113k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
113k
        xd->dst.y_stride);
1510
515k
  } else {
1511
515k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
515k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
515k
        xd->dst.y_stride);
1514
515k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
515k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
515k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
515k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
515k
        xd->dst.y_stride);
1519
515k
  }
1520
1521
1.16M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
544k
    unsigned char *pre_y_buffer =
1524
544k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
544k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
544k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
544k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
544k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
544k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
544k
    if (near_sad[4] != INT_MAX) {
1533
322k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
322k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
322k
          pre_y_stride);
1536
322k
    }
1537
544k
    if (near_sad[5] != INT_MAX) {
1538
400k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
400k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
400k
    }
1541
544k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
544k
                                               pre_y_buffer, pre_y_stride);
1543
544k
    if (near_sad[6] != INT_MAX) {
1544
403k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
403k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
403k
    }
1547
544k
    if (near_sad[7] != INT_MAX) {
1548
346k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
346k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
346k
          pre_y_stride);
1551
346k
    }
1552
544k
  }
1553
1554
1.16M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
544k
    insertsortsad(near_sad, near_sadidx, 8);
1556
624k
  } else {
1557
624k
    insertsortsad(near_sad, near_sadidx, 3);
1558
624k
  }
1559
1.16M
}
1560
1561
756k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
756k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
174k
    int i;
1564
1565
1.46M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.29M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
563k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
563k
                              best_ref_mv->as_mv.row) >>
1569
563k
                             1);
1570
563k
        const int row_idx = mv_max + row_val;
1571
563k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
563k
                              best_ref_mv->as_mv.col) >>
1573
563k
                             1);
1574
563k
        const int col_idx = mv_max + col_val;
1575
563k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
563k
            col_idx < MVvals) {
1577
563k
          x->MVcount[0][row_idx]++;
1578
563k
          x->MVcount[1][col_idx]++;
1579
563k
        }
1580
563k
      }
1581
1.29M
    }
1582
582k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
92.5k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
92.5k
                          best_ref_mv->as_mv.row) >>
1585
92.5k
                         1);
1586
92.5k
    const int row_idx = mv_max + row_val;
1587
92.5k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
92.5k
                          best_ref_mv->as_mv.col) >>
1589
92.5k
                         1);
1590
92.5k
    const int col_idx = mv_max + col_val;
1591
92.5k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
92.5k
      x->MVcount[0][row_idx]++;
1593
92.5k
      x->MVcount[1][col_idx]++;
1594
92.5k
    }
1595
92.5k
  }
1596
756k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
2.81M
                                  MACROBLOCK *x) {
1601
2.81M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
2.81M
  BLOCK *b = &x->block[0];
1603
2.81M
  MACROBLOCKD *xd = &x->e_mbd;
1604
2.81M
  int distortion;
1605
2.81M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
2.81M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
2.81M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
2.81M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
2.81M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
2.81M
  rd->rate2 += rd->rate_y;
1649
2.81M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
2.81M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
2.81M
                   cpi->common.full_pixel);
1654
2.81M
  rd->rate2 += rd->rate_uv;
1655
2.81M
  rd->distortion2 += rd->distortion_uv;
1656
2.81M
  return INT_MAX;
1657
2.81M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
6.64M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
6.64M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
6.64M
  if (cpi->common.mb_no_coeff_skip) {
1670
6.64M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
6.64M
    rd->rate2 += *other_cost;
1672
6.64M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
6.64M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
6.64M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
6.02M
    if (cpi->common.mb_no_coeff_skip) {
1685
6.02M
      int i;
1686
6.02M
      int tteob;
1687
6.02M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
6.02M
      tteob = 0;
1690
6.02M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
102M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
6.02M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
28.9M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.21M
      } else {
1697
2.80M
        tteob += uv_intra_tteob;
1698
2.80M
      }
1699
1700
6.02M
      if (tteob == 0) {
1701
355k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
355k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
355k
        if (cpi->prob_skip_false) {
1707
355k
          int prob_skip_cost;
1708
1709
355k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
355k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
355k
          rd->rate2 += prob_skip_cost;
1712
355k
          *other_cost += prob_skip_cost;
1713
355k
        }
1714
355k
      }
1715
6.02M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
6.02M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
6.02M
    if (this_rd < INT_MAX &&
1719
6.02M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
2.80M
      this_rd += intra_rd_penalty;
1721
2.80M
    }
1722
6.02M
  }
1723
6.64M
  return this_rd;
1724
6.64M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.47M
                             MACROBLOCK *x) {
1729
2.47M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.47M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.47M
  best_mode->yrd =
1735
2.47M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.47M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.47M
  best_mode->rd = this_rd;
1739
2.47M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.47M
  best_mode->partition = *x->partition_info;
1741
1742
2.47M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
558k
    int i;
1744
9.49M
    for (i = 0; i < 16; ++i) {
1745
8.93M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
8.93M
    }
1747
558k
  }
1748
2.47M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
756k
                            int mb_col) {
1754
756k
  BLOCK *b = &x->block[0];
1755
756k
  BLOCKD *d = &x->e_mbd.block[0];
1756
756k
  MACROBLOCKD *xd = &x->e_mbd;
1757
756k
  int_mv best_ref_mv_sb[2];
1758
756k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
756k
  int_mv best_ref_mv;
1760
756k
  int_mv *mode_mv;
1761
756k
  MB_PREDICTION_MODE this_mode;
1762
756k
  int num00;
1763
756k
  int best_mode_index = 0;
1764
756k
  BEST_MODE best_mode;
1765
1766
756k
  int i;
1767
756k
  int mode_index;
1768
756k
  int mdcounts[4];
1769
756k
  int rate;
1770
756k
  RATE_DISTORTION rd;
1771
756k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
756k
  int uv_intra_tteob = 0;
1773
756k
  int uv_intra_done = 0;
1774
1775
756k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
756k
  int_mv mvp;
1777
756k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
756k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
756k
  int sr = 0;
1781
1782
756k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
756k
  int ref_frame_map[4];
1784
756k
  int sign_bias = 0;
1785
1786
756k
  int intra_rd_penalty =
1787
756k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
756k
#if CONFIG_TEMPORAL_DENOISING
1790
756k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
756k
               best_rd_sse = UINT_MAX;
1792
756k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
756k
  rd.rate_uv = 0;
1796
756k
  rd.distortion_uv = 0;
1797
1798
756k
  mode_mv = mode_mv_sb[sign_bias];
1799
756k
  best_ref_mv.as_int = 0;
1800
756k
  best_mode.rd = INT_MAX;
1801
756k
  best_mode.yrd = INT_MAX;
1802
756k
  best_mode.intra_rd = INT_MAX;
1803
756k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
756k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
756k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
756k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
756k
  if (ref_frame_map[1] > 0) {
1814
756k
    sign_bias = vp8_find_near_mvs_bias(
1815
756k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
756k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
756k
    mode_mv = mode_mv_sb[sign_bias];
1819
756k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
756k
  }
1821
1822
756k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
756k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
756k
  x->mbs_tested_so_far++;
1827
1828
756k
  x->skip = 0;
1829
1830
15.8M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
15.1M
    int this_rd = INT_MAX;
1832
15.1M
    int disable_skip = 0;
1833
15.1M
    int other_cost = 0;
1834
15.1M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
15.1M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
13.5M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
8.39M
    rd.rate2 = 0;
1845
8.39M
    rd.distortion2 = 0;
1846
1847
8.39M
    this_mode = vp8_mode_order[mode_index];
1848
1849
8.39M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
8.39M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
8.39M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
8.39M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.28M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.28M
             plane[this_ref_frame][1] != NULL &&
1867
5.28M
             plane[this_ref_frame][2] != NULL);
1868
5.28M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.28M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.28M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.28M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.28M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
8.39M
    if (x->mode_test_hit_counts[mode_index] &&
1884
7.52M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
219k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
219k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
120k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
120k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
20.8k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
20.8k
        }
1895
1896
120k
        x->rd_threshes[mode_index] =
1897
120k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
120k
            x->rd_thresh_mult[mode_index];
1899
1900
120k
        continue;
1901
120k
      }
1902
219k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.27M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.27M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.27M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
756k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
756k
                              &uv_intra_distortion);
1936
756k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
6.81M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
756k
      uv_intra_done = 1;
1946
756k
    }
1947
1948
8.27M
    switch (this_mode) {
1949
529k
      case B_PRED: {
1950
529k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
529k
        int distortion;
1956
529k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
529k
                                           best_mode.yrd);
1958
529k
        rd.rate2 += rate;
1959
529k
        rd.distortion2 += distortion;
1960
1961
529k
        if (tmp_rd < best_mode.yrd) {
1962
228k
          assert(uv_intra_done);
1963
228k
          rd.rate2 += uv_intra_rate;
1964
228k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
228k
          rd.distortion2 += uv_intra_distortion;
1966
228k
          rd.distortion_uv = uv_intra_distortion;
1967
301k
        } else {
1968
301k
          this_rd = INT_MAX;
1969
301k
          disable_skip = 1;
1970
301k
        }
1971
529k
        break;
1972
0
      }
1973
1974
715k
      case SPLITMV: {
1975
715k
        int tmp_rd;
1976
715k
        int this_rd_thresh;
1977
715k
        int distortion;
1978
1979
715k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
715k
                             ? x->rd_threshes[THR_NEW1]
1981
715k
                             : x->rd_threshes[THR_NEW3];
1982
715k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
715k
                             ? x->rd_threshes[THR_NEW2]
1984
715k
                             : this_rd_thresh;
1985
1986
715k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
715k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
715k
            &distortion, this_rd_thresh);
1989
1990
715k
        rd.rate2 += rate;
1991
715k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
715k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
400k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
400k
                         cpi->common.full_pixel);
2000
400k
          rd.rate2 += rd.rate_uv;
2001
400k
          rd.distortion2 += rd.distortion_uv;
2002
400k
        } else {
2003
315k
          this_rd = INT_MAX;
2004
315k
          disable_skip = 1;
2005
315k
        }
2006
715k
        break;
2007
0
      }
2008
756k
      case DC_PRED:
2009
1.36M
      case V_PRED:
2010
1.97M
      case H_PRED:
2011
2.57M
      case TM_PRED: {
2012
2.57M
        int distortion;
2013
2.57M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.57M
        vp8_build_intra_predictors_mby_s(
2016
2.57M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.57M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.57M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.57M
        rd.rate2 += rd.rate_y;
2020
2.57M
        rd.distortion2 += distortion;
2021
2.57M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.57M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.57M
        assert(uv_intra_done);
2024
2.57M
        rd.rate2 += uv_intra_rate;
2025
2.57M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.57M
        rd.distortion2 += uv_intra_distortion;
2027
2.57M
        rd.distortion_uv = uv_intra_distortion;
2028
2.57M
        break;
2029
1.97M
      }
2030
2031
957k
      case NEWMV: {
2032
957k
        int thissme;
2033
957k
        int bestsme = INT_MAX;
2034
957k
        int step_param = cpi->sf.first_step;
2035
957k
        int further_steps;
2036
957k
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
957k
        int do_refine = 1;
2041
2042
957k
        int sadpb = x->sadperbit16;
2043
957k
        int_mv mvp_full;
2044
2045
957k
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
957k
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
957k
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
957k
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
957k
        int tmp_col_min = x->mv_col_min;
2051
957k
        int tmp_col_max = x->mv_col_max;
2052
957k
        int tmp_row_min = x->mv_row_min;
2053
957k
        int tmp_row_max = x->mv_row_max;
2054
2055
957k
        if (!saddone) {
2056
638k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
638k
          saddone = 1;
2058
638k
        }
2059
2060
957k
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
957k
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
957k
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
957k
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
957k
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
957k
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
957k
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
957k
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
957k
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
957k
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
957k
        {
2080
957k
          bestsme = cpi->diamond_search_sad(
2081
957k
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
957k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
957k
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
957k
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
957k
          n = num00;
2089
957k
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
957k
          if (n > further_steps) do_refine = 0;
2094
2095
4.20M
          while (n < further_steps) {
2096
3.24M
            n++;
2097
2098
3.24M
            if (num00) {
2099
299k
              num00--;
2100
2.94M
            } else {
2101
2.94M
              thissme = cpi->diamond_search_sad(
2102
2.94M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
2.94M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
2.94M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
2.94M
              if (thissme < bestsme) {
2109
456k
                bestsme = thissme;
2110
456k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.49M
              } else {
2112
2.49M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.49M
              }
2114
2.94M
            }
2115
3.24M
          }
2116
957k
        }
2117
2118
        /* final 1-away diamond refining search */
2119
957k
        if (do_refine == 1) {
2120
629k
          int search_range;
2121
2122
629k
          search_range = 8;
2123
2124
629k
          thissme = cpi->refining_search_sad(
2125
629k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
629k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
629k
          if (thissme < bestsme) {
2129
30.2k
            bestsme = thissme;
2130
30.2k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
599k
          } else {
2132
599k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
599k
          }
2134
629k
        }
2135
2136
957k
        x->mv_col_min = tmp_col_min;
2137
957k
        x->mv_col_max = tmp_col_max;
2138
957k
        x->mv_row_min = tmp_row_min;
2139
957k
        x->mv_row_max = tmp_row_max;
2140
2141
957k
        if (bestsme < INT_MAX) {
2142
957k
          int dis; /* TODO: use dis in distortion calculation later. */
2143
957k
          unsigned int sse;
2144
957k
          cpi->find_fractional_mv_step(
2145
957k
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
957k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
957k
        }
2148
2149
957k
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
957k
        rd.rate2 +=
2153
957k
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
957k
      }
2155
        // fall through
2156
2157
2.12M
      case NEARESTMV:
2158
3.28M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.28M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.28M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
2.32M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.63M
          continue;
2171
1.63M
        }
2172
        // fall through
2173
2174
2.81M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
2.81M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
2.81M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
2.81M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
2.81M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
2.81M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
2.81M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
2.81M
        break;
2191
2192
0
      default: break;
2193
8.27M
    }
2194
2195
6.64M
    this_rd =
2196
6.64M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
6.64M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
6.64M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
3.10M
        (this_rd < best_mode.intra_rd)) {
2202
1.14M
      best_mode.intra_rd = this_rd;
2203
1.14M
      *returnintra = rd.distortion2;
2204
1.14M
    }
2205
6.64M
#if CONFIG_TEMPORAL_DENOISING
2206
6.64M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
6.64M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
6.64M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.47M
      best_mode_index = mode_index;
2238
2.47M
      *returnrate = rd.rate2;
2239
2.47M
      *returndistortion = rd.distortion2;
2240
2.47M
      if (this_mode <= B_PRED) {
2241
1.00M
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
1.00M
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
1.00M
      }
2245
2.47M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.47M
      x->rd_thresh_mult[mode_index] =
2251
2.47M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.47M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.47M
              : MIN_THRESHMULT;
2254
2.47M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.16M
    else {
2260
4.16M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.16M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.06M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.06M
      }
2265
4.16M
    }
2266
6.64M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
6.64M
                                 x->rd_thresh_mult[mode_index];
2268
2269
6.64M
    if (x->skip) break;
2270
6.64M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
756k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
509k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
509k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
509k
    x->rd_thresh_mult[best_mode_index] =
2278
509k
        (x->rd_thresh_mult[best_mode_index] >=
2279
509k
         (MIN_THRESHMULT + best_adjustment))
2280
509k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
509k
            : MIN_THRESHMULT;
2282
509k
    x->rd_threshes[best_mode_index] =
2283
509k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
509k
        x->rd_thresh_mult[best_mode_index];
2285
509k
  }
2286
2287
756k
#if CONFIG_TEMPORAL_DENOISING
2288
756k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
756k
#endif
2335
2336
756k
  if (cpi->is_src_frame_alt_ref &&
2337
0
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
756k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
756k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.77M
    for (i = 0; i < 16; ++i) {
2354
3.55M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.55M
    }
2356
222k
  }
2357
2358
756k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
2.95M
    for (i = 0; i < 16; ++i) {
2360
2.78M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.78M
    }
2362
2363
174k
    *x->partition_info = best_mode.partition;
2364
2365
174k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
174k
        x->partition_info->bmi[15].mv.as_int;
2367
174k
  }
2368
2369
756k
  if (sign_bias !=
2370
756k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
756k
  rd_update_mvcount(x, &best_ref_mv);
2375
756k
}
2376
2377
741k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
741k
  int error4x4, error16x16;
2379
741k
  int rate4x4, rate16x16 = 0, rateuv;
2380
741k
  int dist4x4, dist16x16, distuv;
2381
741k
  int rate_;
2382
741k
  int rate4x4_tokenonly = 0;
2383
741k
  int rate16x16_tokenonly = 0;
2384
741k
  int rateuv_tokenonly = 0;
2385
2386
741k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
741k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
741k
  rate_ = rateuv;
2390
2391
741k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
741k
                                          &dist16x16);
2393
2394
741k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
741k
                                       &dist4x4, error16x16);
2396
2397
741k
  if (error4x4 < error16x16) {
2398
345k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
345k
    rate_ += rate4x4;
2400
395k
  } else {
2401
395k
    rate_ += rate16x16;
2402
395k
  }
2403
2404
741k
  *rate = rate_;
2405
741k
}