Coverage Report

Created: 2026-02-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.63M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
151k
                    [ENTROPY_NODES]) {
116
151k
  int i, j, k;
117
118
758k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
5.45M
    for (j = 0; j < COEF_BANDS; ++j) {
120
19.4M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
14.5M
        if (k == 0 && j > (i == 0)) {
125
4.09M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
10.4M
        } else {
127
10.4M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
10.4M
        }
129
14.5M
      }
130
4.85M
    }
131
606k
  }
132
151k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
151k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
151k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
151k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
151k
}
162
163
151k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
151k
  int q;
165
151k
  int i;
166
151k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
151k
  double rdconst = 2.80;
168
169
151k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
151k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
151k
  if (cpi->mb.zbin_over_quant > 0) {
178
34.2k
    double oq_factor;
179
34.2k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
34.2k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
34.2k
    modq = (int)((double)capped_q * oq_factor);
186
34.2k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
34.2k
  }
188
189
151k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
151k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
151k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
151k
  vp8_set_speed_features(cpi);
202
203
3.18M
  for (i = 0; i < MAX_MODES; ++i) {
204
3.03M
    x->mode_test_hit_counts[i] = 0;
205
3.03M
  }
206
207
151k
  q = (int)pow(Qvalue, 1.25);
208
209
151k
  if (q < 8) q = 8;
210
211
151k
  if (cpi->RDMULT > 1000) {
212
87.8k
    cpi->RDDIV = 1;
213
87.8k
    cpi->RDMULT /= 100;
214
215
1.84M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.75M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.66M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.66M
      } else {
219
88.2k
        x->rd_threshes[i] = INT_MAX;
220
88.2k
      }
221
222
1.75M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.75M
    }
224
87.8k
  } else {
225
63.7k
    cpi->RDDIV = 100;
226
227
1.33M
    for (i = 0; i < MAX_MODES; ++i) {
228
1.27M
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
1.18M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
1.18M
      } else {
231
89.2k
        x->rd_threshes[i] = INT_MAX;
232
89.2k
      }
233
234
1.27M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
1.27M
    }
236
63.7k
  }
237
238
151k
  {
239
    /* build token cost array for the type of frame we have now */
240
151k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
151k
    if (cpi->common.refresh_alt_ref_frame) {
243
35.7k
      l = &cpi->lfc_a;
244
115k
    } else if (cpi->common.refresh_golden_frame) {
245
11.4k
      l = &cpi->lfc_g;
246
11.4k
    }
247
248
151k
    fill_token_costs(cpi->mb.token_costs,
249
151k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
151k
    vp8_init_mode_costs(cpi);
258
151k
  }
259
151k
}
260
261
59.1k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
59.1k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
59.1k
  milliseconds_for_compress =
265
59.1k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
59.1k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
59.1k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
59.1k
          milliseconds_for_compress) {
284
59.1k
    if (cpi->avg_pick_mode_time == 0) {
285
2.98k
      cpi->Speed = 4;
286
56.1k
    } else {
287
56.1k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
0
        cpi->Speed += 2;
289
0
        cpi->avg_pick_mode_time = 0;
290
0
        cpi->avg_encode_time = 0;
291
292
0
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
0
      }
296
297
56.1k
      if (milliseconds_for_compress * 100 >
298
56.1k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
56.1k
        cpi->Speed -= 1;
300
56.1k
        cpi->avg_pick_mode_time = 0;
301
56.1k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
56.1k
        if (cpi->Speed < 4) {
305
56.1k
          cpi->Speed = 4;
306
56.1k
        }
307
56.1k
      }
308
56.1k
    }
309
59.1k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
59.1k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
13.5k
int VP8_UVSSE(MACROBLOCK *x) {
372
13.5k
  unsigned char *uptr, *vptr;
373
13.5k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
13.5k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
13.5k
  int uv_stride = x->block[16].src_stride;
376
377
13.5k
  unsigned int sse1 = 0;
378
13.5k
  unsigned int sse2 = 0;
379
13.5k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
13.5k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
13.5k
  int offset;
382
13.5k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
13.5k
  if (mv_row < 0) {
385
876
    mv_row -= 1;
386
12.6k
  } else {
387
12.6k
    mv_row += 1;
388
12.6k
  }
389
390
13.5k
  if (mv_col < 0) {
391
909
    mv_col -= 1;
392
12.5k
  } else {
393
12.5k
    mv_col += 1;
394
12.5k
  }
395
396
13.5k
  mv_row /= 2;
397
13.5k
  mv_col /= 2;
398
399
13.5k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
13.5k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
13.5k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
13.5k
  if ((mv_row | mv_col) & 7) {
404
2.48k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
2.48k
                              upred_ptr, uv_stride, &sse2);
406
2.48k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
2.48k
                              vpred_ptr, uv_stride, &sse1);
408
2.48k
    sse2 += sse1;
409
11.0k
  } else {
410
11.0k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
11.0k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
11.0k
    sse2 += sse1;
413
11.0k
  }
414
13.5k
  return sse2;
415
13.5k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
468M
                       ENTROPY_CONTEXT *l) {
419
468M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
468M
  int eob = (int)(*b->eob);
421
468M
  int pt; /* surrounding block/prev coef predictor */
422
468M
  int cost = 0;
423
468M
  short *qcoeff_ptr = b->qcoeff;
424
425
468M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
468M
  assert(eob <= 16);
428
4.53G
  for (; c < eob; ++c) {
429
4.06G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
4.06G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
4.06G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
4.06G
    cost += vp8_dct_value_cost_ptr[v];
433
4.06G
    pt = vp8_prev_token_class[t];
434
4.06G
  }
435
436
468M
  if (c < 16) {
437
305M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
305M
  }
439
440
468M
  pt = (c != !type); /* is eob first coefficient; */
441
468M
  *a = *l = pt;
442
443
468M
  return cost;
444
468M
}
445
446
8.65M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
8.65M
  int cost = 0;
448
8.65M
  int b;
449
8.65M
  MACROBLOCKD *x = &mb->e_mbd;
450
8.65M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
8.65M
  ENTROPY_CONTEXT *ta;
452
8.65M
  ENTROPY_CONTEXT *tl;
453
454
8.65M
  t_above = *mb->e_mbd.above_context;
455
8.65M
  t_left = *mb->e_mbd.left_context;
456
457
8.65M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
8.65M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
147M
  for (b = 0; b < 16; ++b) {
461
138M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
138M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
138M
  }
464
465
8.65M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
8.65M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
8.65M
  return cost;
469
8.65M
}
470
471
8.65M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
8.65M
  int b;
473
8.65M
  MACROBLOCKD *const x = &mb->e_mbd;
474
8.65M
  BLOCK *const mb_y2 = mb->block + 24;
475
8.65M
  BLOCKD *const x_y2 = x->block + 24;
476
8.65M
  short *Y2DCPtr = mb_y2->src_diff;
477
8.65M
  BLOCK *beptr;
478
8.65M
  int d;
479
480
8.65M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
8.65M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
77.8M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
69.2M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
69.2M
    *Y2DCPtr++ = beptr->coeff[0];
487
69.2M
    *Y2DCPtr++ = beptr->coeff[16];
488
69.2M
  }
489
490
  /* 2nd order fdct */
491
8.65M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
147M
  for (b = 0; b < 16; ++b) {
495
138M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
138M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
8.65M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
8.65M
  d = vp8_mbblock_error(mb, 1) << 2;
503
8.65M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
8.65M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
8.65M
  *Rate = vp8_rdcost_mby(mb);
509
8.65M
}
510
511
27.4M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
27.4M
  const unsigned int *p = (const unsigned int *)predictor;
513
27.4M
  unsigned int *d = (unsigned int *)dst;
514
27.4M
  d[0] = p[0];
515
27.4M
  d[4] = p[4];
516
27.4M
  d[8] = p[8];
517
27.4M
  d[12] = p[12];
518
27.4M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
14.7M
                                 int *bestdistortion) {
526
14.7M
  B_PREDICTION_MODE mode;
527
14.7M
  int best_rd = INT_MAX;
528
14.7M
  int rate = 0;
529
14.7M
  int distortion;
530
531
14.7M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
14.7M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
14.7M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
14.7M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
14.7M
  int dst_stride = x->e_mbd.dst.y_stride;
541
14.7M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
14.7M
  unsigned char *Above = dst - dst_stride;
544
14.7M
  unsigned char *yleft = dst - 1;
545
14.7M
  unsigned char top_left = Above[-1];
546
547
162M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
147M
    int this_rd;
549
147M
    int ratey;
550
551
147M
    rate = bmode_costs[mode];
552
553
147M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
147M
                         top_left);
555
147M
    vp8_subtract_b(be, b, 16);
556
147M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
147M
    x->quantize_b(be, b);
558
559
147M
    tempa = ta;
560
147M
    templ = tl;
561
562
147M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
147M
    rate += ratey;
564
147M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
147M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
147M
    if (this_rd < best_rd) {
569
27.4M
      *bestrate = rate;
570
27.4M
      *bestratey = ratey;
571
27.4M
      *bestdistortion = distortion;
572
27.4M
      best_rd = this_rd;
573
27.4M
      *best_mode = mode;
574
27.4M
      *a = tempa;
575
27.4M
      *l = templ;
576
27.4M
      copy_predictor(best_predictor, b->predictor);
577
27.4M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
27.4M
    }
579
147M
  }
580
14.7M
  b->bmi.as_mode = *best_mode;
581
582
14.7M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
14.7M
  return best_rd;
585
14.7M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.32M
                                     int *Distortion, int best_rd) {
589
1.32M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.32M
  int i;
591
1.32M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.32M
  int distortion = 0;
593
1.32M
  int tot_rate_y = 0;
594
1.32M
  int64_t total_rd = 0;
595
1.32M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.32M
  ENTROPY_CONTEXT *ta;
597
1.32M
  ENTROPY_CONTEXT *tl;
598
1.32M
  const int *bmode_costs;
599
600
1.32M
  t_above = *mb->e_mbd.above_context;
601
1.32M
  t_left = *mb->e_mbd.left_context;
602
603
1.32M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.32M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.32M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.32M
  bmode_costs = mb->inter_bmode_costs;
609
610
15.3M
  for (i = 0; i < 16; ++i) {
611
14.7M
    MODE_INFO *const mic = xd->mode_info_context;
612
14.7M
    const int mis = xd->mode_info_stride;
613
14.7M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
14.7M
    int r = 0, ry = 0, d = 0;
615
616
14.7M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
8.31M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
8.31M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
8.31M
      bmode_costs = mb->bmode_costs[A][L];
621
8.31M
    }
622
623
14.7M
    total_rd += rd_pick_intra4x4block(
624
14.7M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
14.7M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
14.7M
    cost += r;
628
14.7M
    distortion += d;
629
14.7M
    tot_rate_y += ry;
630
631
14.7M
    assert(best_mode != B_MODE_COUNT);
632
14.7M
    mic->bmi[i].as_mode = best_mode;
633
634
14.7M
    if (total_rd >= (int64_t)best_rd) break;
635
14.7M
  }
636
637
1.32M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
596k
  *Rate = cost;
640
596k
  *rate_y = tot_rate_y;
641
596k
  *Distortion = distortion;
642
643
596k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.32M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
771k
                                      int *Distortion) {
648
771k
  MB_PREDICTION_MODE mode;
649
771k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
771k
  int rate, ratey;
651
771k
  int distortion;
652
771k
  int best_rd = INT_MAX;
653
771k
  int this_rd;
654
771k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
3.85M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
3.08M
    xd->mode_info_context->mbmi.mode = mode;
659
660
3.08M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
3.08M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
3.08M
                                     xd->predictor, 16);
663
664
3.08M
    macro_block_yrd(x, &ratey, &distortion);
665
3.08M
    rate = ratey +
666
3.08M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
3.08M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
3.08M
    if (this_rd < best_rd) {
671
953k
      mode_selected = mode;
672
953k
      best_rd = this_rd;
673
953k
      *Rate = rate;
674
953k
      *rate_y = ratey;
675
953k
      *Distortion = distortion;
676
953k
    }
677
3.08M
  }
678
679
771k
  assert(mode_selected != MB_MODE_COUNT);
680
771k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
771k
  return best_rd;
682
771k
}
683
684
9.48M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
9.48M
  int b;
686
9.48M
  int cost = 0;
687
9.48M
  MACROBLOCKD *x = &mb->e_mbd;
688
9.48M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
9.48M
  ENTROPY_CONTEXT *ta;
690
9.48M
  ENTROPY_CONTEXT *tl;
691
692
9.48M
  t_above = *mb->e_mbd.above_context;
693
9.48M
  t_left = *mb->e_mbd.left_context;
694
695
9.48M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
9.48M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
85.3M
  for (b = 16; b < 24; ++b) {
699
75.8M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
75.8M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
75.8M
  }
702
703
9.48M
  return cost;
704
9.48M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
2.91M
                            int *distortion, int fullpixel) {
708
2.91M
  (void)cpi;
709
2.91M
  (void)fullpixel;
710
711
2.91M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
2.91M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
2.91M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
2.91M
                    &x->e_mbd.predictor[320], 8);
715
716
2.91M
  vp8_transform_mbuv(x);
717
2.91M
  vp8_quantize_mbuv(x);
718
719
2.91M
  *rate = rd_cost_mbuv(x);
720
2.91M
  *distortion = vp8_mbuverror(x) / 4;
721
722
2.91M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
2.91M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
407k
                          int *distortion, int fullpixel) {
727
407k
  (void)cpi;
728
407k
  (void)fullpixel;
729
730
407k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
407k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
407k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
407k
                    &x->e_mbd.predictor[320], 8);
734
735
407k
  vp8_transform_mbuv(x);
736
407k
  vp8_quantize_mbuv(x);
737
738
407k
  *rate = rd_cost_mbuv(x);
739
407k
  *distortion = vp8_mbuverror(x) / 4;
740
741
407k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
407k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.54M
                                    int *rate_tokenonly, int *distortion) {
746
1.54M
  MB_PREDICTION_MODE mode;
747
1.54M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.54M
  int best_rd = INT_MAX;
749
1.54M
  int d = 0, r = 0;
750
1.54M
  int rate_to;
751
1.54M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
7.70M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
6.16M
    int this_rate;
755
6.16M
    int this_distortion;
756
6.16M
    int this_rd;
757
758
6.16M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
6.16M
    vp8_build_intra_predictors_mbuv_s(
761
6.16M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
6.16M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
6.16M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
6.16M
        &xd->predictor[320], 8);
765
766
6.16M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
6.16M
                      x->src.uv_stride, &xd->predictor[256],
768
6.16M
                      &xd->predictor[320], 8);
769
6.16M
    vp8_transform_mbuv(x);
770
6.16M
    vp8_quantize_mbuv(x);
771
772
6.16M
    rate_to = rd_cost_mbuv(x);
773
6.16M
    this_rate =
774
6.16M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
6.16M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
6.16M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
6.16M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
6.16M
    if (this_rd < best_rd) {
782
1.92M
      best_rd = this_rd;
783
1.92M
      d = this_distortion;
784
1.92M
      r = this_rate;
785
1.92M
      *rate_tokenonly = rate_to;
786
1.92M
      mode_selected = mode;
787
1.92M
    }
788
6.16M
  }
789
790
1.54M
  *rate = r;
791
1.54M
  *distortion = d;
792
793
1.54M
  assert(mode_selected != MB_MODE_COUNT);
794
1.54M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.54M
}
796
797
6.85M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
6.85M
  vp8_prob p[VP8_MVREFS - 1];
799
6.85M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
6.85M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
6.85M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
6.85M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
6.85M
}
804
805
2.91M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
2.91M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
2.91M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
2.91M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
32.3M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
32.3M
  MACROBLOCKD *const xd = &x->e_mbd;
814
32.3M
  MODE_INFO *const mic = xd->mode_info_context;
815
32.3M
  const int mis = xd->mode_info_stride;
816
817
32.3M
  int cost = 0;
818
32.3M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
32.3M
  int i = 0;
825
826
518M
  do {
827
518M
    BLOCKD *const d = xd->block + i;
828
518M
    const int row = i >> 2, col = i & 3;
829
830
518M
    B_PREDICTION_MODE m;
831
832
518M
    if (labelings[i] != which_label) continue;
833
834
125M
    if (col && labelings[i] == labelings[i - 1]) {
835
64.7M
      m = LEFT4X4;
836
64.7M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
28.4M
      m = ABOVE4X4;
838
32.3M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
32.3M
      switch (m = this_mode) {
843
8.98M
        case NEW4X4:
844
8.98M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.98M
          break;
846
9.42M
        case LEFT4X4:
847
9.42M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
9.42M
          break;
849
7.33M
        case ABOVE4X4:
850
7.33M
          this_mv->as_int =
851
7.33M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
7.33M
          break;
853
6.63M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
32.3M
      }
856
857
32.3M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
7.33M
        int_mv left_mv;
859
860
7.33M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
7.33M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
7.33M
      }
864
865
32.3M
      cost = x->inter_bmode_costs[m];
866
32.3M
    }
867
868
125M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
125M
    x->partition_info->bmi[i].mode = m;
871
125M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
518M
  } while (++i < 16);
874
875
32.3M
  cost += thismvcost;
876
32.3M
  return cost;
877
32.3M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
25.2M
                              ENTROPY_CONTEXT *tl) {
882
25.2M
  int cost = 0;
883
25.2M
  int b;
884
25.2M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
429M
  for (b = 0; b < 16; ++b) {
887
404M
    if (labels[b] == which_label) {
888
97.7M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
97.7M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
97.7M
    }
891
404M
  }
892
893
25.2M
  return cost;
894
25.2M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
25.2M
                                                int which_label) {
898
25.2M
  int i;
899
25.2M
  unsigned int distortion = 0;
900
25.2M
  int pre_stride = x->e_mbd.pre.y_stride;
901
25.2M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
429M
  for (i = 0; i < 16; ++i) {
904
404M
    if (labels[i] == which_label) {
905
97.7M
      BLOCKD *bd = &x->e_mbd.block[i];
906
97.7M
      BLOCK *be = &x->block[i];
907
908
97.7M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
97.7M
                                   x->e_mbd.subpixel_predict);
910
97.7M
      vp8_subtract_b(be, bd, 16);
911
97.7M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
97.7M
      x->quantize_b(be, bd);
913
914
97.7M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
97.7M
    }
916
404M
  }
917
918
25.2M
  return distortion;
919
25.2M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.71M
                             unsigned int segmentation) {
946
1.71M
  int i;
947
1.71M
  int const *labels;
948
1.71M
  int br = 0;
949
1.71M
  int bd = 0;
950
1.71M
  B_PREDICTION_MODE this_mode;
951
952
1.71M
  int label_count;
953
1.71M
  int this_segment_rd = 0;
954
1.71M
  int label_mv_thresh;
955
1.71M
  int rate = 0;
956
1.71M
  int sbr = 0;
957
1.71M
  int sbd = 0;
958
1.71M
  int segmentyrate = 0;
959
960
1.71M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.71M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.71M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.71M
  t_above = *x->e_mbd.above_context;
966
1.71M
  t_left = *x->e_mbd.left_context;
967
968
1.71M
  vp8_zero(t_above_b);
969
1.71M
  vp8_zero(t_left_b);
970
971
1.71M
  br = 0;
972
1.71M
  bd = 0;
973
974
1.71M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.71M
  labels = vp8_mbsplits[segmentation];
976
1.71M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.71M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.71M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.71M
                        vp8_mbsplit_encodings + segmentation);
987
1.71M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.71M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.71M
  br += rate;
990
991
7.38M
  for (i = 0; i < label_count; ++i) {
992
6.56M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.56M
    int best_label_rd = INT_MAX;
994
6.56M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.56M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
32.3M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
26.2M
      int this_rd;
1000
26.2M
      int distortion;
1001
26.2M
      int labelyrate;
1002
26.2M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
26.2M
      ENTROPY_CONTEXT *ta_s;
1004
26.2M
      ENTROPY_CONTEXT *tl_s;
1005
1006
26.2M
      t_above_s = t_above;
1007
26.2M
      t_left_s = t_left;
1008
1009
26.2M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
26.2M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
26.2M
      if (this_mode == NEW4X4) {
1013
6.56M
        int sseshift;
1014
6.56M
        int num00;
1015
6.56M
        int step_param = 0;
1016
6.56M
        int further_steps;
1017
6.56M
        int n;
1018
6.56M
        int thissme;
1019
6.56M
        int bestsme = INT_MAX;
1020
6.56M
        int_mv temp_mv;
1021
6.56M
        BLOCK *c;
1022
6.56M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.56M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
6.12M
        if (cpi->compressor_speed) {
1030
6.12M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.53M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.53M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
363k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
363k
            }
1035
1036
1.53M
            step_param = bsi->sv_istep[i];
1037
1.53M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
6.12M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
2.09M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
2.09M
            if (i == 4 || i == 8 || i == 12) {
1045
429k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
429k
            }
1047
2.09M
            step_param = 2;
1048
2.09M
          }
1049
6.12M
        }
1050
1051
6.12M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
6.12M
        {
1054
6.12M
          int sadpb = x->sadperbit4;
1055
6.12M
          int_mv mvp_full;
1056
1057
6.12M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
6.12M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
6.12M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
6.12M
          c = &x->block[n];
1064
6.12M
          e = &x->e_mbd.block[n];
1065
1066
6.12M
          {
1067
6.12M
            bestsme = cpi->diamond_search_sad(
1068
6.12M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
6.12M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
6.12M
            n = num00;
1072
6.12M
            num00 = 0;
1073
1074
24.1M
            while (n < further_steps) {
1075
18.0M
              n++;
1076
1077
18.0M
              if (num00) {
1078
2.34M
                num00--;
1079
15.6M
              } else {
1080
15.6M
                thissme = cpi->diamond_search_sad(
1081
15.6M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
15.6M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
15.6M
                if (thissme < bestsme) {
1085
2.96M
                  bestsme = thissme;
1086
2.96M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
2.96M
                }
1088
15.6M
              }
1089
18.0M
            }
1090
6.12M
          }
1091
1092
6.12M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
6.12M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
6.12M
        }
1114
1115
6.12M
        if (bestsme < INT_MAX) {
1116
6.12M
          int disto;
1117
6.12M
          unsigned int sse;
1118
6.12M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
6.12M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
6.12M
                                       &disto, &sse);
1121
6.12M
        }
1122
6.12M
      } /* NEW4X4 */
1123
1124
25.8M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
25.8M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
25.8M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
25.7M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
25.5M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
25.5M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
531k
        continue;
1133
531k
      }
1134
1135
25.2M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
25.2M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
25.2M
      rate += labelyrate;
1139
1140
25.2M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
25.2M
      if (this_rd < best_label_rd) {
1143
11.1M
        sbr = rate;
1144
11.1M
        sbd = distortion;
1145
11.1M
        bestlabelyrate = labelyrate;
1146
11.1M
        mode_selected = this_mode;
1147
11.1M
        best_label_rd = this_rd;
1148
1149
11.1M
        t_above_b = t_above_s;
1150
11.1M
        t_left_b = t_left_s;
1151
11.1M
      }
1152
25.2M
    } /*for each 4x4 mode*/
1153
1154
6.56M
    t_above = t_above_b;
1155
6.56M
    t_left = t_left_b;
1156
1157
6.56M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.56M
                bsi->ref_mv, x->mvcost);
1159
1160
6.56M
    br += sbr;
1161
6.56M
    bd += sbd;
1162
6.56M
    segmentyrate += bestlabelyrate;
1163
6.56M
    this_segment_rd += best_label_rd;
1164
1165
6.56M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.56M
  } /* for each label */
1168
1169
1.71M
  if (this_segment_rd < bsi->segment_rd) {
1170
822k
    bsi->r = br;
1171
822k
    bsi->d = bd;
1172
822k
    bsi->segment_yrate = segmentyrate;
1173
822k
    bsi->segment_rd = this_segment_rd;
1174
822k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
13.9M
    for (i = 0; i < 16; ++i) {
1178
13.1M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
13.1M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
13.1M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
13.1M
    }
1182
822k
  }
1183
1.71M
}
1184
1185
1.63M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.63M
  int step = 0;
1187
1188
1.63M
  if (sr > MAX_FIRST_STEP) {
1189
57.2k
    sr = MAX_FIRST_STEP;
1190
1.57M
  } else if (sr < 1) {
1191
752k
    sr = 1;
1192
752k
  }
1193
1194
4.87M
  while (sr >>= 1) step++;
1195
1196
1.63M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.63M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
743k
                                           int mvthresh) {
1205
743k
  int i;
1206
743k
  BEST_SEG_INFO bsi;
1207
1208
743k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
743k
  bsi.segment_rd = best_rd;
1211
743k
  bsi.ref_mv = best_ref_mv;
1212
743k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
743k
  bsi.mvthresh = mvthresh;
1214
743k
  bsi.mdcounts = mdcounts;
1215
1216
12.6M
  for (i = 0; i < 16; ++i) {
1217
11.8M
    bsi.modes[i] = ZERO4X4;
1218
11.8M
  }
1219
1220
743k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
743k
  } else {
1228
743k
    int sr;
1229
1230
743k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
743k
    if (bsi.segment_rd < best_rd) {
1233
407k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
407k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
407k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
407k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
407k
      int tmp_col_min = x->mv_col_min;
1239
407k
      int tmp_col_max = x->mv_col_max;
1240
407k
      int tmp_row_min = x->mv_row_min;
1241
407k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
407k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
407k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
407k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
407k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
407k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
407k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
407k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
407k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
407k
      {
1260
407k
        sr =
1261
407k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
407k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
407k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
407k
        sr =
1266
407k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
407k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
407k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
407k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
407k
      }
1272
1273
      /* block 16X8 */
1274
407k
      {
1275
407k
        sr =
1276
407k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
407k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
407k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
407k
        sr =
1281
407k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
407k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
407k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
407k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
407k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
407k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
155k
      {
1293
155k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
155k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
155k
      }
1296
1297
      /* restore UMV window */
1298
407k
      x->mv_col_min = tmp_col_min;
1299
407k
      x->mv_col_max = tmp_col_max;
1300
407k
      x->mv_row_min = tmp_row_min;
1301
407k
      x->mv_row_max = tmp_row_max;
1302
407k
    }
1303
743k
  }
1304
1305
  /* set it to the best */
1306
12.6M
  for (i = 0; i < 16; ++i) {
1307
11.8M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
11.8M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
11.8M
    *bd->eob = bsi.eobs[i];
1311
11.8M
  }
1312
1313
743k
  *returntotrate = bsi.r;
1314
743k
  *returndistortion = bsi.d;
1315
743k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
743k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
743k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.84M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
3.10M
    int j;
1323
1324
3.10M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
3.10M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
3.10M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
3.10M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
743k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
743k
  return bsi.segment_rd;
1335
743k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.57M
                 int near_sadidx[]) {
1341
1.57M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.57M
  const MODE_INFO *left = here - 1;
1343
1.57M
  const MODE_INFO *aboveleft = above - 1;
1344
1.57M
  int_mv near_mvs[8];
1345
1.57M
  int near_ref[8];
1346
1.57M
  int_mv mv;
1347
1.57M
  int vcnt = 0;
1348
1.57M
  int find = 0;
1349
1.57M
  int mb_offset;
1350
1351
1.57M
  int mvx[8];
1352
1.57M
  int mvy[8];
1353
1.57M
  int i;
1354
1355
1.57M
  mv.as_int = 0;
1356
1357
1.57M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.57M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.57M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.57M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.57M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.57M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.57M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
489k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
489k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
489k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
489k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
489k
    }
1373
1.57M
    vcnt++;
1374
1.57M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
623k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
623k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
623k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
623k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
623k
    }
1380
1.57M
    vcnt++;
1381
1.57M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
376k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
376k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
376k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
376k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
376k
    }
1387
1.57M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.57M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
988k
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
988k
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
988k
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
603k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
603k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
603k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
603k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
603k
      }
1401
988k
      vcnt++;
1402
1403
      /* above in last frame */
1404
988k
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
988k
          INTRA_FRAME) {
1406
354k
        near_mvs[vcnt].as_int =
1407
354k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
354k
        mv_bias(
1409
354k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
354k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
354k
        near_ref[vcnt] =
1412
354k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
354k
      }
1414
988k
      vcnt++;
1415
1416
      /* left in last frame */
1417
988k
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
400k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
400k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
400k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
400k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
400k
      }
1423
988k
      vcnt++;
1424
1425
      /* right in last frame */
1426
988k
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
407k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
407k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
407k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
407k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
407k
      }
1432
988k
      vcnt++;
1433
1434
      /* below in last frame */
1435
988k
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
988k
          INTRA_FRAME) {
1437
350k
        near_mvs[vcnt].as_int =
1438
350k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
350k
        mv_bias(
1440
350k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
350k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
350k
        near_ref[vcnt] =
1443
350k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
350k
      }
1445
988k
      vcnt++;
1446
988k
    }
1447
1448
6.60M
    for (i = 0; i < vcnt; ++i) {
1449
5.88M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
2.05M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
851k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
851k
          find = 1;
1453
851k
          if (i < 3) {
1454
768k
            *sr = 3;
1455
768k
          } else {
1456
83.5k
            *sr = 2;
1457
83.5k
          }
1458
851k
          break;
1459
851k
        }
1460
2.05M
      }
1461
5.88M
    }
1462
1463
1.57M
    if (!find) {
1464
5.12M
      for (i = 0; i < vcnt; ++i) {
1465
4.40M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.40M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.40M
      }
1468
1469
720k
      insertsortmv(mvx, vcnt);
1470
720k
      insertsortmv(mvy, vcnt);
1471
720k
      mv.as_mv.row = mvx[vcnt / 2];
1472
720k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
720k
      *sr = 0;
1478
720k
    }
1479
1.57M
  }
1480
1481
  /* Set up return values */
1482
1.57M
  mvp->as_int = mv.as_int;
1483
1.57M
  vp8_clamp_mv2(mvp, xd);
1484
1.57M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.11M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.11M
  int near_sad[8] = { 0 };
1493
1.11M
  BLOCK *b = &x->block[0];
1494
1.11M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.11M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
89.2k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.02M
  } else if (xd->mb_to_top_edge ==
1500
1.02M
             0) { /* only has left MB for sad calculation. */
1501
403k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
403k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
403k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
626k
  } else if (xd->mb_to_left_edge ==
1505
626k
             0) { /* only has left MB for sad calculation. */
1506
117k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
117k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
117k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
117k
        xd->dst.y_stride);
1510
508k
  } else {
1511
508k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
508k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
508k
        xd->dst.y_stride);
1514
508k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
508k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
508k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
508k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
508k
        xd->dst.y_stride);
1519
508k
  }
1520
1521
1.11M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
534k
    unsigned char *pre_y_buffer =
1524
534k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
534k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
534k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
534k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
534k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
534k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
534k
    if (near_sad[4] != INT_MAX) {
1533
325k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
325k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
325k
          pre_y_stride);
1536
325k
    }
1537
534k
    if (near_sad[5] != INT_MAX) {
1538
383k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
383k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
383k
    }
1541
534k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
534k
                                               pre_y_buffer, pre_y_stride);
1543
534k
    if (near_sad[6] != INT_MAX) {
1544
385k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
385k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
385k
    }
1547
534k
    if (near_sad[7] != INT_MAX) {
1548
346k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
346k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
346k
          pre_y_stride);
1551
346k
    }
1552
534k
  }
1553
1554
1.11M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
534k
    insertsortsad(near_sad, near_sadidx, 8);
1556
584k
  } else {
1557
584k
    insertsortsad(near_sad, near_sadidx, 3);
1558
584k
  }
1559
1.11M
}
1560
1561
769k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
769k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
181k
    int i;
1564
1565
1.55M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.37M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
606k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
606k
                              best_ref_mv->as_mv.row) >>
1569
606k
                             1);
1570
606k
        const int row_idx = mv_max + row_val;
1571
606k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
606k
                              best_ref_mv->as_mv.col) >>
1573
606k
                             1);
1574
606k
        const int col_idx = mv_max + col_val;
1575
606k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
606k
            col_idx < MVvals) {
1577
606k
          x->MVcount[0][row_idx]++;
1578
606k
          x->MVcount[1][col_idx]++;
1579
606k
        }
1580
606k
      }
1581
1.37M
    }
1582
587k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
92.4k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
92.4k
                          best_ref_mv->as_mv.row) >>
1585
92.4k
                         1);
1586
92.4k
    const int row_idx = mv_max + row_val;
1587
92.4k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
92.4k
                          best_ref_mv->as_mv.col) >>
1589
92.4k
                         1);
1590
92.4k
    const int col_idx = mv_max + col_val;
1591
92.4k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
92.4k
      x->MVcount[0][row_idx]++;
1593
92.4k
      x->MVcount[1][col_idx]++;
1594
92.4k
    }
1595
92.4k
  }
1596
769k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
2.91M
                                  MACROBLOCK *x) {
1601
2.91M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
2.91M
  BLOCK *b = &x->block[0];
1603
2.91M
  MACROBLOCKD *xd = &x->e_mbd;
1604
2.91M
  int distortion;
1605
2.91M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
2.91M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
2.91M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
2.91M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
2.91M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
2.91M
  rd->rate2 += rd->rate_y;
1649
2.91M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
2.91M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
2.91M
                   cpi->common.full_pixel);
1654
2.91M
  rd->rate2 += rd->rate_uv;
1655
2.91M
  rd->distortion2 += rd->distortion_uv;
1656
2.91M
  return INT_MAX;
1657
2.91M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
6.86M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
6.86M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
6.86M
  if (cpi->common.mb_no_coeff_skip) {
1670
6.86M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
6.86M
    rd->rate2 += *other_cost;
1672
6.86M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
6.86M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
6.86M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
6.21M
    if (cpi->common.mb_no_coeff_skip) {
1685
6.21M
      int i;
1686
6.21M
      int tteob;
1687
6.21M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
6.21M
      tteob = 0;
1690
6.21M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
105M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
6.21M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
29.9M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.32M
      } else {
1697
2.88M
        tteob += uv_intra_tteob;
1698
2.88M
      }
1699
1700
6.21M
      if (tteob == 0) {
1701
341k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
341k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
341k
        if (cpi->prob_skip_false) {
1707
341k
          int prob_skip_cost;
1708
1709
341k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
341k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
341k
          rd->rate2 += prob_skip_cost;
1712
341k
          *other_cost += prob_skip_cost;
1713
341k
        }
1714
341k
      }
1715
6.21M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
6.21M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
6.21M
    if (this_rd < INT_MAX &&
1719
6.21M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
2.88M
      this_rd += intra_rd_penalty;
1721
2.88M
    }
1722
6.21M
  }
1723
6.86M
  return this_rd;
1724
6.86M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.52M
                             MACROBLOCK *x) {
1729
2.52M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.52M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.52M
  best_mode->yrd =
1735
2.52M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.52M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.52M
  best_mode->rd = this_rd;
1739
2.52M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.52M
  best_mode->partition = *x->partition_info;
1741
1742
2.52M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
572k
    int i;
1744
9.72M
    for (i = 0; i < 16; ++i) {
1745
9.15M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
9.15M
    }
1747
572k
  }
1748
2.52M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
769k
                            int mb_col) {
1754
769k
  BLOCK *b = &x->block[0];
1755
769k
  BLOCKD *d = &x->e_mbd.block[0];
1756
769k
  MACROBLOCKD *xd = &x->e_mbd;
1757
769k
  int_mv best_ref_mv_sb[2];
1758
769k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
769k
  int_mv best_ref_mv;
1760
769k
  int_mv *mode_mv;
1761
769k
  MB_PREDICTION_MODE this_mode;
1762
769k
  int num00;
1763
769k
  int best_mode_index = 0;
1764
769k
  BEST_MODE best_mode;
1765
1766
769k
  int i;
1767
769k
  int mode_index;
1768
769k
  int mdcounts[4];
1769
769k
  int rate;
1770
769k
  RATE_DISTORTION rd;
1771
769k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
769k
  int uv_intra_tteob = 0;
1773
769k
  int uv_intra_done = 0;
1774
1775
769k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
769k
  int_mv mvp;
1777
769k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
769k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
769k
  int sr = 0;
1781
1782
769k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
769k
  int ref_frame_map[4];
1784
769k
  int sign_bias = 0;
1785
1786
769k
  int intra_rd_penalty =
1787
769k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
769k
#if CONFIG_TEMPORAL_DENOISING
1790
769k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
769k
               best_rd_sse = UINT_MAX;
1792
769k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
769k
  rd.rate_uv = 0;
1796
769k
  rd.distortion_uv = 0;
1797
1798
769k
  mode_mv = mode_mv_sb[sign_bias];
1799
769k
  best_ref_mv.as_int = 0;
1800
769k
  best_mode.rd = INT_MAX;
1801
769k
  best_mode.yrd = INT_MAX;
1802
769k
  best_mode.intra_rd = INT_MAX;
1803
769k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
769k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
769k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
769k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
769k
  if (ref_frame_map[1] > 0) {
1814
769k
    sign_bias = vp8_find_near_mvs_bias(
1815
769k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
769k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
769k
    mode_mv = mode_mv_sb[sign_bias];
1819
769k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
769k
  }
1821
1822
769k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
769k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
769k
  x->mbs_tested_so_far++;
1827
1828
769k
  x->skip = 0;
1829
1830
16.1M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
15.3M
    int this_rd = INT_MAX;
1832
15.3M
    int disable_skip = 0;
1833
15.3M
    int other_cost = 0;
1834
15.3M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
15.3M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
13.8M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
8.65M
    rd.rate2 = 0;
1845
8.65M
    rd.distortion2 = 0;
1846
1847
8.65M
    this_mode = vp8_mode_order[mode_index];
1848
1849
8.65M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
8.65M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
8.65M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
8.65M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.45M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.45M
             plane[this_ref_frame][1] != NULL &&
1867
5.45M
             plane[this_ref_frame][2] != NULL);
1868
5.45M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.45M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.45M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.45M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.45M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
8.65M
    if (x->mode_test_hit_counts[mode_index] &&
1884
7.76M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
232k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
232k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
127k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
127k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
24.0k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
24.0k
        }
1895
1896
127k
        x->rd_threshes[mode_index] =
1897
127k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
127k
            x->rd_thresh_mult[mode_index];
1899
1900
127k
        continue;
1901
127k
      }
1902
232k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.52M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.52M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.52M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
769k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
769k
                              &uv_intra_distortion);
1936
769k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
6.92M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
769k
      uv_intra_done = 1;
1946
769k
    }
1947
1948
8.52M
    switch (this_mode) {
1949
551k
      case B_PRED: {
1950
551k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
551k
        int distortion;
1956
551k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
551k
                                           best_mode.yrd);
1958
551k
        rd.rate2 += rate;
1959
551k
        rd.distortion2 += distortion;
1960
1961
551k
        if (tmp_rd < best_mode.yrd) {
1962
237k
          assert(uv_intra_done);
1963
237k
          rd.rate2 += uv_intra_rate;
1964
237k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
237k
          rd.distortion2 += uv_intra_distortion;
1966
237k
          rd.distortion_uv = uv_intra_distortion;
1967
314k
        } else {
1968
314k
          this_rd = INT_MAX;
1969
314k
          disable_skip = 1;
1970
314k
        }
1971
551k
        break;
1972
0
      }
1973
1974
743k
      case SPLITMV: {
1975
743k
        int tmp_rd;
1976
743k
        int this_rd_thresh;
1977
743k
        int distortion;
1978
1979
743k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
743k
                             ? x->rd_threshes[THR_NEW1]
1981
743k
                             : x->rd_threshes[THR_NEW3];
1982
743k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
743k
                             ? x->rd_threshes[THR_NEW2]
1984
743k
                             : this_rd_thresh;
1985
1986
743k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
743k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
743k
            &distortion, this_rd_thresh);
1989
1990
743k
        rd.rate2 += rate;
1991
743k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
743k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
407k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
407k
                         cpi->common.full_pixel);
2000
407k
          rd.rate2 += rd.rate_uv;
2001
407k
          rd.distortion2 += rd.distortion_uv;
2002
407k
        } else {
2003
335k
          this_rd = INT_MAX;
2004
335k
          disable_skip = 1;
2005
335k
        }
2006
743k
        break;
2007
0
      }
2008
769k
      case DC_PRED:
2009
1.40M
      case V_PRED:
2010
2.03M
      case H_PRED:
2011
2.65M
      case TM_PRED: {
2012
2.65M
        int distortion;
2013
2.65M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.65M
        vp8_build_intra_predictors_mby_s(
2016
2.65M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.65M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.65M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.65M
        rd.rate2 += rd.rate_y;
2020
2.65M
        rd.distortion2 += distortion;
2021
2.65M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.65M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.65M
        assert(uv_intra_done);
2024
2.65M
        rd.rate2 += uv_intra_rate;
2025
2.65M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.65M
        rd.distortion2 += uv_intra_distortion;
2027
2.65M
        rd.distortion_uv = uv_intra_distortion;
2028
2.65M
        break;
2029
2.03M
      }
2030
2031
995k
      case NEWMV: {
2032
995k
        int thissme;
2033
995k
        int bestsme = INT_MAX;
2034
995k
        int step_param = cpi->sf.first_step;
2035
995k
        int further_steps;
2036
995k
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
995k
        int do_refine = 1;
2041
2042
995k
        int sadpb = x->sadperbit16;
2043
995k
        int_mv mvp_full;
2044
2045
995k
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
995k
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
995k
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
995k
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
995k
        int tmp_col_min = x->mv_col_min;
2051
995k
        int tmp_col_max = x->mv_col_max;
2052
995k
        int tmp_row_min = x->mv_row_min;
2053
995k
        int tmp_row_max = x->mv_row_max;
2054
2055
995k
        if (!saddone) {
2056
656k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
656k
          saddone = 1;
2058
656k
        }
2059
2060
995k
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
995k
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
995k
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
995k
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
995k
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
995k
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
995k
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
995k
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
995k
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
995k
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
995k
        {
2080
995k
          bestsme = cpi->diamond_search_sad(
2081
995k
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
995k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
995k
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
995k
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
995k
          n = num00;
2089
995k
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
995k
          if (n > further_steps) do_refine = 0;
2094
2095
4.40M
          while (n < further_steps) {
2096
3.41M
            n++;
2097
2098
3.41M
            if (num00) {
2099
301k
              num00--;
2100
3.11M
            } else {
2101
3.11M
              thissme = cpi->diamond_search_sad(
2102
3.11M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
3.11M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
3.11M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
3.11M
              if (thissme < bestsme) {
2109
484k
                bestsme = thissme;
2110
484k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.62M
              } else {
2112
2.62M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.62M
              }
2114
3.11M
            }
2115
3.41M
          }
2116
995k
        }
2117
2118
        /* final 1-away diamond refining search */
2119
995k
        if (do_refine == 1) {
2120
660k
          int search_range;
2121
2122
660k
          search_range = 8;
2123
2124
660k
          thissme = cpi->refining_search_sad(
2125
660k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
660k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
660k
          if (thissme < bestsme) {
2129
32.0k
            bestsme = thissme;
2130
32.0k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
628k
          } else {
2132
628k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
628k
          }
2134
660k
        }
2135
2136
995k
        x->mv_col_min = tmp_col_min;
2137
995k
        x->mv_col_max = tmp_col_max;
2138
995k
        x->mv_row_min = tmp_row_min;
2139
995k
        x->mv_row_max = tmp_row_max;
2140
2141
995k
        if (bestsme < INT_MAX) {
2142
995k
          int dis; /* TODO: use dis in distortion calculation later. */
2143
995k
          unsigned int sse;
2144
995k
          cpi->find_fractional_mv_step(
2145
995k
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
995k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
995k
        }
2148
2149
995k
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
995k
        rd.rate2 +=
2153
995k
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
995k
      }
2155
        // fall through
2156
2157
2.19M
      case NEARESTMV:
2158
3.38M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.38M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.38M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
2.39M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.66M
          continue;
2171
1.66M
        }
2172
        // fall through
2173
2174
2.91M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
2.91M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
2.91M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
2.91M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
2.91M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
2.91M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
2.91M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
2.91M
        break;
2191
2192
0
      default: break;
2193
8.52M
    }
2194
2195
6.86M
    this_rd =
2196
6.86M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
6.86M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
6.86M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
3.20M
        (this_rd < best_mode.intra_rd)) {
2202
1.17M
      best_mode.intra_rd = this_rd;
2203
1.17M
      *returnintra = rd.distortion2;
2204
1.17M
    }
2205
6.86M
#if CONFIG_TEMPORAL_DENOISING
2206
6.86M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
6.86M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
6.86M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.52M
      best_mode_index = mode_index;
2238
2.52M
      *returnrate = rd.rate2;
2239
2.52M
      *returndistortion = rd.distortion2;
2240
2.52M
      if (this_mode <= B_PRED) {
2241
1.03M
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
1.03M
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
1.03M
      }
2245
2.52M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.52M
      x->rd_thresh_mult[mode_index] =
2251
2.52M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.52M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.52M
              : MIN_THRESHMULT;
2254
2.52M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.33M
    else {
2260
4.33M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.33M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.13M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.13M
      }
2265
4.33M
    }
2266
6.86M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
6.86M
                                 x->rd_thresh_mult[mode_index];
2268
2269
6.86M
    if (x->skip) break;
2270
6.86M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
769k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
528k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
528k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
528k
    x->rd_thresh_mult[best_mode_index] =
2278
528k
        (x->rd_thresh_mult[best_mode_index] >=
2279
528k
         (MIN_THRESHMULT + best_adjustment))
2280
528k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
528k
            : MIN_THRESHMULT;
2282
528k
    x->rd_threshes[best_mode_index] =
2283
528k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
528k
        x->rd_thresh_mult[best_mode_index];
2285
528k
  }
2286
2287
769k
#if CONFIG_TEMPORAL_DENOISING
2288
769k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
769k
#endif
2335
2336
769k
  if (cpi->is_src_frame_alt_ref &&
2337
0
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
769k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
769k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.92M
    for (i = 0; i < 16; ++i) {
2354
3.69M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.69M
    }
2356
230k
  }
2357
2358
769k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
3.08M
    for (i = 0; i < 16; ++i) {
2360
2.90M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.90M
    }
2362
2363
181k
    *x->partition_info = best_mode.partition;
2364
2365
181k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
181k
        x->partition_info->bmi[15].mv.as_int;
2367
181k
  }
2368
2369
769k
  if (sign_bias !=
2370
769k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
769k
  rd_update_mvcount(x, &best_ref_mv);
2375
769k
}
2376
2377
771k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
771k
  int error4x4, error16x16;
2379
771k
  int rate4x4, rate16x16 = 0, rateuv;
2380
771k
  int dist4x4, dist16x16, distuv;
2381
771k
  int rate_;
2382
771k
  int rate4x4_tokenonly = 0;
2383
771k
  int rate16x16_tokenonly = 0;
2384
771k
  int rateuv_tokenonly = 0;
2385
2386
771k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
771k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
771k
  rate_ = rateuv;
2390
2391
771k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
771k
                                          &dist16x16);
2393
2394
771k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
771k
                                       &dist4x4, error16x16);
2396
2397
771k
  if (error4x4 < error16x16) {
2398
356k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
356k
    rate_ += rate4x4;
2400
414k
  } else {
2401
414k
    rate_ += rate16x16;
2402
414k
  }
2403
2404
771k
  *rate = rate_;
2405
771k
}