Coverage Report

Created: 2025-11-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.62M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
111k
                    [ENTROPY_NODES]) {
116
111k
  int i, j, k;
117
118
555k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
4.00M
    for (j = 0; j < COEF_BANDS; ++j) {
120
14.2M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
10.6M
        if (k == 0 && j > (i == 0)) {
125
3.00M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
7.66M
        } else {
127
7.66M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
7.66M
        }
129
10.6M
      }
130
3.55M
    }
131
444k
  }
132
111k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
111k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
111k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
111k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
111k
}
162
163
111k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
111k
  int q;
165
111k
  int i;
166
111k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
111k
  double rdconst = 2.80;
168
169
111k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
111k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
111k
  if (cpi->mb.zbin_over_quant > 0) {
178
20.3k
    double oq_factor;
179
20.3k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
20.3k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
20.3k
    modq = (int)((double)capped_q * oq_factor);
186
20.3k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
20.3k
  }
188
189
111k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
111k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
111k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
111k
  vp8_set_speed_features(cpi);
202
203
2.33M
  for (i = 0; i < MAX_MODES; ++i) {
204
2.22M
    x->mode_test_hit_counts[i] = 0;
205
2.22M
  }
206
207
111k
  q = (int)pow(Qvalue, 1.25);
208
209
111k
  if (q < 8) q = 8;
210
211
111k
  if (cpi->RDMULT > 1000) {
212
61.4k
    cpi->RDDIV = 1;
213
61.4k
    cpi->RDMULT /= 100;
214
215
1.29M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.22M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.17M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.17M
      } else {
219
54.3k
        x->rd_threshes[i] = INT_MAX;
220
54.3k
      }
221
222
1.22M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.22M
    }
224
61.4k
  } else {
225
49.6k
    cpi->RDDIV = 100;
226
227
1.04M
    for (i = 0; i < MAX_MODES; ++i) {
228
993k
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
920k
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
920k
      } else {
231
72.8k
        x->rd_threshes[i] = INT_MAX;
232
72.8k
      }
233
234
993k
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
993k
    }
236
49.6k
  }
237
238
111k
  {
239
    /* build token cost array for the type of frame we have now */
240
111k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
111k
    if (cpi->common.refresh_alt_ref_frame) {
243
26.7k
      l = &cpi->lfc_a;
244
84.4k
    } else if (cpi->common.refresh_golden_frame) {
245
8.22k
      l = &cpi->lfc_g;
246
8.22k
    }
247
248
111k
    fill_token_costs(cpi->mb.token_costs,
249
111k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
111k
    vp8_init_mode_costs(cpi);
258
111k
  }
259
111k
}
260
261
42.4k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
42.4k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
42.4k
  milliseconds_for_compress =
265
42.4k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
42.4k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
42.4k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
42.4k
          milliseconds_for_compress) {
284
42.4k
    if (cpi->avg_pick_mode_time == 0) {
285
3.01k
      cpi->Speed = 4;
286
39.3k
    } else {
287
39.3k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
0
        cpi->Speed += 2;
289
0
        cpi->avg_pick_mode_time = 0;
290
0
        cpi->avg_encode_time = 0;
291
292
0
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
0
      }
296
297
39.3k
      if (milliseconds_for_compress * 100 >
298
39.3k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
39.2k
        cpi->Speed -= 1;
300
39.2k
        cpi->avg_pick_mode_time = 0;
301
39.2k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
39.2k
        if (cpi->Speed < 4) {
305
39.2k
          cpi->Speed = 4;
306
39.2k
        }
307
39.2k
      }
308
39.3k
    }
309
42.4k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
42.4k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
42.2k
int VP8_UVSSE(MACROBLOCK *x) {
372
42.2k
  unsigned char *uptr, *vptr;
373
42.2k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
42.2k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
42.2k
  int uv_stride = x->block[16].src_stride;
376
377
42.2k
  unsigned int sse1 = 0;
378
42.2k
  unsigned int sse2 = 0;
379
42.2k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
42.2k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
42.2k
  int offset;
382
42.2k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
42.2k
  if (mv_row < 0) {
385
560
    mv_row -= 1;
386
41.6k
  } else {
387
41.6k
    mv_row += 1;
388
41.6k
  }
389
390
42.2k
  if (mv_col < 0) {
391
737
    mv_col -= 1;
392
41.5k
  } else {
393
41.5k
    mv_col += 1;
394
41.5k
  }
395
396
42.2k
  mv_row /= 2;
397
42.2k
  mv_col /= 2;
398
399
42.2k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
42.2k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
42.2k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
42.2k
  if ((mv_row | mv_col) & 7) {
404
1.82k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
1.82k
                              upred_ptr, uv_stride, &sse2);
406
1.82k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
1.82k
                              vpred_ptr, uv_stride, &sse1);
408
1.82k
    sse2 += sse1;
409
40.4k
  } else {
410
40.4k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
40.4k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
40.4k
    sse2 += sse1;
413
40.4k
  }
414
42.2k
  return sse2;
415
42.2k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
469M
                       ENTROPY_CONTEXT *l) {
419
469M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
469M
  int eob = (int)(*b->eob);
421
469M
  int pt; /* surrounding block/prev coef predictor */
422
469M
  int cost = 0;
423
469M
  short *qcoeff_ptr = b->qcoeff;
424
425
469M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
469M
  assert(eob <= 16);
428
4.56G
  for (; c < eob; ++c) {
429
4.09G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
4.09G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
4.09G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
4.09G
    cost += vp8_dct_value_cost_ptr[v];
433
4.09G
    pt = vp8_prev_token_class[t];
434
4.09G
  }
435
436
469M
  if (c < 16) {
437
295M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
295M
  }
439
440
469M
  pt = (c != !type); /* is eob first coefficient; */
441
469M
  *a = *l = pt;
442
443
469M
  return cost;
444
469M
}
445
446
9.09M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
9.09M
  int cost = 0;
448
9.09M
  int b;
449
9.09M
  MACROBLOCKD *x = &mb->e_mbd;
450
9.09M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
9.09M
  ENTROPY_CONTEXT *ta;
452
9.09M
  ENTROPY_CONTEXT *tl;
453
454
9.09M
  t_above = *mb->e_mbd.above_context;
455
9.09M
  t_left = *mb->e_mbd.left_context;
456
457
9.09M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
9.09M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
154M
  for (b = 0; b < 16; ++b) {
461
145M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
145M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
145M
  }
464
465
9.09M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
9.09M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
9.09M
  return cost;
469
9.09M
}
470
471
9.09M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
9.09M
  int b;
473
9.09M
  MACROBLOCKD *const x = &mb->e_mbd;
474
9.09M
  BLOCK *const mb_y2 = mb->block + 24;
475
9.09M
  BLOCKD *const x_y2 = x->block + 24;
476
9.09M
  short *Y2DCPtr = mb_y2->src_diff;
477
9.09M
  BLOCK *beptr;
478
9.09M
  int d;
479
480
9.09M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
9.09M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
81.8M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
72.7M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
72.7M
    *Y2DCPtr++ = beptr->coeff[0];
487
72.7M
    *Y2DCPtr++ = beptr->coeff[16];
488
72.7M
  }
489
490
  /* 2nd order fdct */
491
9.09M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
154M
  for (b = 0; b < 16; ++b) {
495
145M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
145M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
9.09M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
9.09M
  d = vp8_mbblock_error(mb, 1) << 2;
503
9.09M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
9.09M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
9.09M
  *Rate = vp8_rdcost_mby(mb);
509
9.09M
}
510
511
25.6M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
25.6M
  const unsigned int *p = (const unsigned int *)predictor;
513
25.6M
  unsigned int *d = (unsigned int *)dst;
514
25.6M
  d[0] = p[0];
515
25.6M
  d[4] = p[4];
516
25.6M
  d[8] = p[8];
517
25.6M
  d[12] = p[12];
518
25.6M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
13.7M
                                 int *bestdistortion) {
526
13.7M
  B_PREDICTION_MODE mode;
527
13.7M
  int best_rd = INT_MAX;
528
13.7M
  int rate = 0;
529
13.7M
  int distortion;
530
531
13.7M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
13.7M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
13.7M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
13.7M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
13.7M
  int dst_stride = x->e_mbd.dst.y_stride;
541
13.7M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
13.7M
  unsigned char *Above = dst - dst_stride;
544
13.7M
  unsigned char *yleft = dst - 1;
545
13.7M
  unsigned char top_left = Above[-1];
546
547
151M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
137M
    int this_rd;
549
137M
    int ratey;
550
551
137M
    rate = bmode_costs[mode];
552
553
137M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
137M
                         top_left);
555
137M
    vp8_subtract_b(be, b, 16);
556
137M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
137M
    x->quantize_b(be, b);
558
559
137M
    tempa = ta;
560
137M
    templ = tl;
561
562
137M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
137M
    rate += ratey;
564
137M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
137M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
137M
    if (this_rd < best_rd) {
569
25.6M
      *bestrate = rate;
570
25.6M
      *bestratey = ratey;
571
25.6M
      *bestdistortion = distortion;
572
25.6M
      best_rd = this_rd;
573
25.6M
      *best_mode = mode;
574
25.6M
      *a = tempa;
575
25.6M
      *l = templ;
576
25.6M
      copy_predictor(best_predictor, b->predictor);
577
25.6M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
25.6M
    }
579
137M
  }
580
13.7M
  b->bmi.as_mode = *best_mode;
581
582
13.7M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
13.7M
  return best_rd;
585
13.7M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.36M
                                     int *Distortion, int best_rd) {
589
1.36M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.36M
  int i;
591
1.36M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.36M
  int distortion = 0;
593
1.36M
  int tot_rate_y = 0;
594
1.36M
  int64_t total_rd = 0;
595
1.36M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.36M
  ENTROPY_CONTEXT *ta;
597
1.36M
  ENTROPY_CONTEXT *tl;
598
1.36M
  const int *bmode_costs;
599
600
1.36M
  t_above = *mb->e_mbd.above_context;
601
1.36M
  t_left = *mb->e_mbd.left_context;
602
603
1.36M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.36M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.36M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.36M
  bmode_costs = mb->inter_bmode_costs;
609
610
14.3M
  for (i = 0; i < 16; ++i) {
611
13.7M
    MODE_INFO *const mic = xd->mode_info_context;
612
13.7M
    const int mis = xd->mode_info_stride;
613
13.7M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
13.7M
    int r = 0, ry = 0, d = 0;
615
616
13.7M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
7.54M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
7.54M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
7.54M
      bmode_costs = mb->bmode_costs[A][L];
621
7.54M
    }
622
623
13.7M
    total_rd += rd_pick_intra4x4block(
624
13.7M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
13.7M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
13.7M
    cost += r;
628
13.7M
    distortion += d;
629
13.7M
    tot_rate_y += ry;
630
631
13.7M
    assert(best_mode != B_MODE_COUNT);
632
13.7M
    mic->bmi[i].as_mode = best_mode;
633
634
13.7M
    if (total_rd >= (int64_t)best_rd) break;
635
13.7M
  }
636
637
1.36M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
523k
  *Rate = cost;
640
523k
  *rate_y = tot_rate_y;
641
523k
  *Distortion = distortion;
642
643
523k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.36M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
800k
                                      int *Distortion) {
648
800k
  MB_PREDICTION_MODE mode;
649
800k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
800k
  int rate, ratey;
651
800k
  int distortion;
652
800k
  int best_rd = INT_MAX;
653
800k
  int this_rd;
654
800k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
4.00M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
3.20M
    xd->mode_info_context->mbmi.mode = mode;
659
660
3.20M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
3.20M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
3.20M
                                     xd->predictor, 16);
663
664
3.20M
    macro_block_yrd(x, &ratey, &distortion);
665
3.20M
    rate = ratey +
666
3.20M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
3.20M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
3.20M
    if (this_rd < best_rd) {
671
962k
      mode_selected = mode;
672
962k
      best_rd = this_rd;
673
962k
      *Rate = rate;
674
962k
      *rate_y = ratey;
675
962k
      *Distortion = distortion;
676
962k
    }
677
3.20M
  }
678
679
800k
  assert(mode_selected != MB_MODE_COUNT);
680
800k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
800k
  return best_rd;
682
800k
}
683
684
9.95M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
9.95M
  int b;
686
9.95M
  int cost = 0;
687
9.95M
  MACROBLOCKD *x = &mb->e_mbd;
688
9.95M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
9.95M
  ENTROPY_CONTEXT *ta;
690
9.95M
  ENTROPY_CONTEXT *tl;
691
692
9.95M
  t_above = *mb->e_mbd.above_context;
693
9.95M
  t_left = *mb->e_mbd.left_context;
694
695
9.95M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
9.95M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
89.5M
  for (b = 16; b < 24; ++b) {
699
79.6M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
79.6M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
79.6M
  }
702
703
9.95M
  return cost;
704
9.95M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
3.06M
                            int *distortion, int fullpixel) {
708
3.06M
  (void)cpi;
709
3.06M
  (void)fullpixel;
710
711
3.06M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
3.06M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
3.06M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
3.06M
                    &x->e_mbd.predictor[320], 8);
715
716
3.06M
  vp8_transform_mbuv(x);
717
3.06M
  vp8_quantize_mbuv(x);
718
719
3.06M
  *rate = rd_cost_mbuv(x);
720
3.06M
  *distortion = vp8_mbuverror(x) / 4;
721
722
3.06M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
3.06M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
406k
                          int *distortion, int fullpixel) {
727
406k
  (void)cpi;
728
406k
  (void)fullpixel;
729
730
406k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
406k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
406k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
406k
                    &x->e_mbd.predictor[320], 8);
734
735
406k
  vp8_transform_mbuv(x);
736
406k
  vp8_quantize_mbuv(x);
737
738
406k
  *rate = rd_cost_mbuv(x);
739
406k
  *distortion = vp8_mbuverror(x) / 4;
740
741
406k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
406k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.62M
                                    int *rate_tokenonly, int *distortion) {
746
1.62M
  MB_PREDICTION_MODE mode;
747
1.62M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.62M
  int best_rd = INT_MAX;
749
1.62M
  int d = 0, r = 0;
750
1.62M
  int rate_to;
751
1.62M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
8.10M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
6.48M
    int this_rate;
755
6.48M
    int this_distortion;
756
6.48M
    int this_rd;
757
758
6.48M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
6.48M
    vp8_build_intra_predictors_mbuv_s(
761
6.48M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
6.48M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
6.48M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
6.48M
        &xd->predictor[320], 8);
765
766
6.48M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
6.48M
                      x->src.uv_stride, &xd->predictor[256],
768
6.48M
                      &xd->predictor[320], 8);
769
6.48M
    vp8_transform_mbuv(x);
770
6.48M
    vp8_quantize_mbuv(x);
771
772
6.48M
    rate_to = rd_cost_mbuv(x);
773
6.48M
    this_rate =
774
6.48M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
6.48M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
6.48M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
6.48M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
6.48M
    if (this_rd < best_rd) {
782
1.97M
      best_rd = this_rd;
783
1.97M
      d = this_distortion;
784
1.97M
      r = this_rate;
785
1.97M
      *rate_tokenonly = rate_to;
786
1.97M
      mode_selected = mode;
787
1.97M
    }
788
6.48M
  }
789
790
1.62M
  *rate = r;
791
1.62M
  *distortion = d;
792
793
1.62M
  assert(mode_selected != MB_MODE_COUNT);
794
1.62M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.62M
}
796
797
6.85M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
6.85M
  vp8_prob p[VP8_MVREFS - 1];
799
6.85M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
6.85M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
6.85M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
6.85M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
6.85M
}
804
805
3.06M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
3.06M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
3.06M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
3.06M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
32.4M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
32.4M
  MACROBLOCKD *const xd = &x->e_mbd;
814
32.4M
  MODE_INFO *const mic = xd->mode_info_context;
815
32.4M
  const int mis = xd->mode_info_stride;
816
817
32.4M
  int cost = 0;
818
32.4M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
32.4M
  int i = 0;
825
826
519M
  do {
827
519M
    BLOCKD *const d = xd->block + i;
828
519M
    const int row = i >> 2, col = i & 3;
829
830
519M
    B_PREDICTION_MODE m;
831
832
519M
    if (labelings[i] != which_label) continue;
833
834
125M
    if (col && labelings[i] == labelings[i - 1]) {
835
64.5M
      m = LEFT4X4;
836
64.5M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
28.3M
      m = ABOVE4X4;
838
32.4M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
32.4M
      switch (m = this_mode) {
843
8.97M
        case NEW4X4:
844
8.97M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.97M
          break;
846
9.50M
        case LEFT4X4:
847
9.50M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
9.50M
          break;
849
7.30M
        case ABOVE4X4:
850
7.30M
          this_mv->as_int =
851
7.30M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
7.30M
          break;
853
6.65M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
32.4M
      }
856
857
32.4M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
7.30M
        int_mv left_mv;
859
860
7.30M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
7.30M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
7.30M
      }
864
865
32.4M
      cost = x->inter_bmode_costs[m];
866
32.4M
    }
867
868
125M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
125M
    x->partition_info->bmi[i].mode = m;
871
125M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
519M
  } while (++i < 16);
874
875
32.4M
  cost += thismvcost;
876
32.4M
  return cost;
877
32.4M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
25.3M
                              ENTROPY_CONTEXT *tl) {
882
25.3M
  int cost = 0;
883
25.3M
  int b;
884
25.3M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
430M
  for (b = 0; b < 16; ++b) {
887
405M
    if (labels[b] == which_label) {
888
97.5M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
97.5M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
97.5M
    }
891
405M
  }
892
893
25.3M
  return cost;
894
25.3M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
25.3M
                                                int which_label) {
898
25.3M
  int i;
899
25.3M
  unsigned int distortion = 0;
900
25.3M
  int pre_stride = x->e_mbd.pre.y_stride;
901
25.3M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
430M
  for (i = 0; i < 16; ++i) {
904
405M
    if (labels[i] == which_label) {
905
97.5M
      BLOCKD *bd = &x->e_mbd.block[i];
906
97.5M
      BLOCK *be = &x->block[i];
907
908
97.5M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
97.5M
                                   x->e_mbd.subpixel_predict);
910
97.5M
      vp8_subtract_b(be, bd, 16);
911
97.5M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
97.5M
      x->quantize_b(be, bd);
913
914
97.5M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
97.5M
    }
916
405M
  }
917
918
25.3M
  return distortion;
919
25.3M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.72M
                             unsigned int segmentation) {
946
1.72M
  int i;
947
1.72M
  int const *labels;
948
1.72M
  int br = 0;
949
1.72M
  int bd = 0;
950
1.72M
  B_PREDICTION_MODE this_mode;
951
952
1.72M
  int label_count;
953
1.72M
  int this_segment_rd = 0;
954
1.72M
  int label_mv_thresh;
955
1.72M
  int rate = 0;
956
1.72M
  int sbr = 0;
957
1.72M
  int sbd = 0;
958
1.72M
  int segmentyrate = 0;
959
960
1.72M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.72M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.72M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.72M
  t_above = *x->e_mbd.above_context;
966
1.72M
  t_left = *x->e_mbd.left_context;
967
968
1.72M
  vp8_zero(t_above_b);
969
1.72M
  vp8_zero(t_left_b);
970
971
1.72M
  br = 0;
972
1.72M
  bd = 0;
973
974
1.72M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.72M
  labels = vp8_mbsplits[segmentation];
976
1.72M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.72M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.72M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.72M
                        vp8_mbsplit_encodings + segmentation);
987
1.72M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.72M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.72M
  br += rate;
990
991
7.40M
  for (i = 0; i < label_count; ++i) {
992
6.59M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.59M
    int best_label_rd = INT_MAX;
994
6.59M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.59M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
32.4M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
26.3M
      int this_rd;
1000
26.3M
      int distortion;
1001
26.3M
      int labelyrate;
1002
26.3M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
26.3M
      ENTROPY_CONTEXT *ta_s;
1004
26.3M
      ENTROPY_CONTEXT *tl_s;
1005
1006
26.3M
      t_above_s = t_above;
1007
26.3M
      t_left_s = t_left;
1008
1009
26.3M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
26.3M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
26.3M
      if (this_mode == NEW4X4) {
1013
6.59M
        int sseshift;
1014
6.59M
        int num00;
1015
6.59M
        int step_param = 0;
1016
6.59M
        int further_steps;
1017
6.59M
        int n;
1018
6.59M
        int thissme;
1019
6.59M
        int bestsme = INT_MAX;
1020
6.59M
        int_mv temp_mv;
1021
6.59M
        BLOCK *c;
1022
6.59M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.59M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
6.08M
        if (cpi->compressor_speed) {
1030
6.08M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.52M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.52M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
361k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
361k
            }
1035
1036
1.52M
            step_param = bsi->sv_istep[i];
1037
1.52M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
6.08M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
2.11M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
2.11M
            if (i == 4 || i == 8 || i == 12) {
1045
433k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
433k
            }
1047
2.11M
            step_param = 2;
1048
2.11M
          }
1049
6.08M
        }
1050
1051
6.08M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
6.08M
        {
1054
6.08M
          int sadpb = x->sadperbit4;
1055
6.08M
          int_mv mvp_full;
1056
1057
6.08M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
6.08M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
6.08M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
6.08M
          c = &x->block[n];
1064
6.08M
          e = &x->e_mbd.block[n];
1065
1066
6.08M
          {
1067
6.08M
            bestsme = cpi->diamond_search_sad(
1068
6.08M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
6.08M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
6.08M
            n = num00;
1072
6.08M
            num00 = 0;
1073
1074
24.5M
            while (n < further_steps) {
1075
18.4M
              n++;
1076
1077
18.4M
              if (num00) {
1078
2.24M
                num00--;
1079
16.2M
              } else {
1080
16.2M
                thissme = cpi->diamond_search_sad(
1081
16.2M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
16.2M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
16.2M
                if (thissme < bestsme) {
1085
3.03M
                  bestsme = thissme;
1086
3.03M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
3.03M
                }
1088
16.2M
              }
1089
18.4M
            }
1090
6.08M
          }
1091
1092
6.08M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
6.08M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
6.08M
        }
1114
1115
6.08M
        if (bestsme < INT_MAX) {
1116
6.08M
          int disto;
1117
6.08M
          unsigned int sse;
1118
6.08M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
6.08M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
6.08M
                                       &disto, &sse);
1121
6.08M
        }
1122
6.08M
      } /* NEW4X4 */
1123
1124
25.8M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
25.8M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
25.8M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
25.8M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
25.6M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
25.6M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
538k
        continue;
1133
538k
      }
1134
1135
25.3M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
25.3M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
25.3M
      rate += labelyrate;
1139
1140
25.3M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
25.3M
      if (this_rd < best_label_rd) {
1143
11.1M
        sbr = rate;
1144
11.1M
        sbd = distortion;
1145
11.1M
        bestlabelyrate = labelyrate;
1146
11.1M
        mode_selected = this_mode;
1147
11.1M
        best_label_rd = this_rd;
1148
1149
11.1M
        t_above_b = t_above_s;
1150
11.1M
        t_left_b = t_left_s;
1151
11.1M
      }
1152
25.3M
    } /*for each 4x4 mode*/
1153
1154
6.59M
    t_above = t_above_b;
1155
6.59M
    t_left = t_left_b;
1156
1157
6.59M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.59M
                bsi->ref_mv, x->mvcost);
1159
1160
6.59M
    br += sbr;
1161
6.59M
    bd += sbd;
1162
6.59M
    segmentyrate += bestlabelyrate;
1163
6.59M
    this_segment_rd += best_label_rd;
1164
1165
6.59M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.59M
  } /* for each label */
1168
1169
1.72M
  if (this_segment_rd < bsi->segment_rd) {
1170
816k
    bsi->r = br;
1171
816k
    bsi->d = bd;
1172
816k
    bsi->segment_yrate = segmentyrate;
1173
816k
    bsi->segment_rd = this_segment_rd;
1174
816k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
13.8M
    for (i = 0; i < 16; ++i) {
1178
13.0M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
13.0M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
13.0M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
13.0M
    }
1182
816k
  }
1183
1.72M
}
1184
1185
1.62M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.62M
  int step = 0;
1187
1188
1.62M
  if (sr > MAX_FIRST_STEP) {
1189
60.2k
    sr = MAX_FIRST_STEP;
1190
1.56M
  } else if (sr < 1) {
1191
735k
    sr = 1;
1192
735k
  }
1193
1194
5.03M
  while (sr >>= 1) step++;
1195
1196
1.62M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.62M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
757k
                                           int mvthresh) {
1205
757k
  int i;
1206
757k
  BEST_SEG_INFO bsi;
1207
1208
757k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
757k
  bsi.segment_rd = best_rd;
1211
757k
  bsi.ref_mv = best_ref_mv;
1212
757k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
757k
  bsi.mvthresh = mvthresh;
1214
757k
  bsi.mdcounts = mdcounts;
1215
1216
12.8M
  for (i = 0; i < 16; ++i) {
1217
12.1M
    bsi.modes[i] = ZERO4X4;
1218
12.1M
  }
1219
1220
757k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
757k
  } else {
1228
757k
    int sr;
1229
1230
757k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
757k
    if (bsi.segment_rd < best_rd) {
1233
406k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
406k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
406k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
406k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
406k
      int tmp_col_min = x->mv_col_min;
1239
406k
      int tmp_col_max = x->mv_col_max;
1240
406k
      int tmp_row_min = x->mv_row_min;
1241
406k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
406k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
406k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
406k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
406k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
406k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
406k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
406k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
406k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
406k
      {
1260
406k
        sr =
1261
406k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
406k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
406k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
406k
        sr =
1266
406k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
406k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
406k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
406k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
406k
      }
1272
1273
      /* block 16X8 */
1274
406k
      {
1275
406k
        sr =
1276
406k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
406k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
406k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
406k
        sr =
1281
406k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
406k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
406k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
406k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
406k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
406k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
157k
      {
1293
157k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
157k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
157k
      }
1296
1297
      /* restore UMV window */
1298
406k
      x->mv_col_min = tmp_col_min;
1299
406k
      x->mv_col_max = tmp_col_max;
1300
406k
      x->mv_row_min = tmp_row_min;
1301
406k
      x->mv_row_max = tmp_row_max;
1302
406k
    }
1303
757k
  }
1304
1305
  /* set it to the best */
1306
12.8M
  for (i = 0; i < 16; ++i) {
1307
12.1M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
12.1M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
12.1M
    *bd->eob = bsi.eobs[i];
1311
12.1M
  }
1312
1313
757k
  *returntotrate = bsi.r;
1314
757k
  *returndistortion = bsi.d;
1315
757k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
757k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
757k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.90M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
3.14M
    int j;
1323
1324
3.14M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
3.14M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
3.14M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
3.14M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
757k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
757k
  return bsi.segment_rd;
1335
757k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.57M
                 int near_sadidx[]) {
1341
1.57M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.57M
  const MODE_INFO *left = here - 1;
1343
1.57M
  const MODE_INFO *aboveleft = above - 1;
1344
1.57M
  int_mv near_mvs[8];
1345
1.57M
  int near_ref[8];
1346
1.57M
  int_mv mv;
1347
1.57M
  int vcnt = 0;
1348
1.57M
  int find = 0;
1349
1.57M
  int mb_offset;
1350
1351
1.57M
  int mvx[8];
1352
1.57M
  int mvy[8];
1353
1.57M
  int i;
1354
1355
1.57M
  mv.as_int = 0;
1356
1357
1.57M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.57M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.57M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.57M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.57M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.57M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.57M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
427k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
427k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
427k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
427k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
427k
    }
1373
1.57M
    vcnt++;
1374
1.57M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
661k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
661k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
661k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
661k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
661k
    }
1380
1.57M
    vcnt++;
1381
1.57M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
349k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
349k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
349k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
349k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
349k
    }
1387
1.57M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.57M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
931k
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
931k
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
931k
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
539k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
539k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
539k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
539k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
539k
      }
1401
931k
      vcnt++;
1402
1403
      /* above in last frame */
1404
931k
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
931k
          INTRA_FRAME) {
1406
289k
        near_mvs[vcnt].as_int =
1407
289k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
289k
        mv_bias(
1409
289k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
289k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
289k
        near_ref[vcnt] =
1412
289k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
289k
      }
1414
931k
      vcnt++;
1415
1416
      /* left in last frame */
1417
931k
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
413k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
413k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
413k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
413k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
413k
      }
1423
931k
      vcnt++;
1424
1425
      /* right in last frame */
1426
931k
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
414k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
414k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
414k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
414k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
414k
      }
1432
931k
      vcnt++;
1433
1434
      /* below in last frame */
1435
931k
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
931k
          INTRA_FRAME) {
1437
292k
        near_mvs[vcnt].as_int =
1438
292k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
292k
        mv_bias(
1440
292k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
292k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
292k
        near_ref[vcnt] =
1443
292k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
292k
      }
1445
931k
      vcnt++;
1446
931k
    }
1447
1448
6.60M
    for (i = 0; i < vcnt; ++i) {
1449
5.85M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
1.98M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
826k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
826k
          find = 1;
1453
826k
          if (i < 3) {
1454
746k
            *sr = 3;
1455
746k
          } else {
1456
80.0k
            *sr = 2;
1457
80.0k
          }
1458
826k
          break;
1459
826k
        }
1460
1.98M
      }
1461
5.85M
    }
1462
1463
1.57M
    if (!find) {
1464
5.17M
      for (i = 0; i < vcnt; ++i) {
1465
4.42M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.42M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.42M
      }
1468
1469
750k
      insertsortmv(mvx, vcnt);
1470
750k
      insertsortmv(mvy, vcnt);
1471
750k
      mv.as_mv.row = mvx[vcnt / 2];
1472
750k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
750k
      *sr = 0;
1478
750k
    }
1479
1.57M
  }
1480
1481
  /* Set up return values */
1482
1.57M
  mvp->as_int = mv.as_int;
1483
1.57M
  vp8_clamp_mv2(mvp, xd);
1484
1.57M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.15M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.15M
  int near_sad[8] = { 0 };
1493
1.15M
  BLOCK *b = &x->block[0];
1494
1.15M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.15M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
63.5k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.09M
  } else if (xd->mb_to_top_edge ==
1500
1.09M
             0) { /* only has left MB for sad calculation. */
1501
486k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
486k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
486k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
607k
  } else if (xd->mb_to_left_edge ==
1505
607k
             0) { /* only has left MB for sad calculation. */
1506
88.2k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
88.2k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
88.2k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
88.2k
        xd->dst.y_stride);
1510
519k
  } else {
1511
519k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
519k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
519k
        xd->dst.y_stride);
1514
519k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
519k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
519k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
519k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
519k
        xd->dst.y_stride);
1519
519k
  }
1520
1521
1.15M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
512k
    unsigned char *pre_y_buffer =
1524
512k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
512k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
512k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
512k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
512k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
512k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
512k
    if (near_sad[4] != INT_MAX) {
1533
288k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
288k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
288k
          pre_y_stride);
1536
288k
    }
1537
512k
    if (near_sad[5] != INT_MAX) {
1538
410k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
410k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
410k
    }
1541
512k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
512k
                                               pre_y_buffer, pre_y_stride);
1543
512k
    if (near_sad[6] != INT_MAX) {
1544
413k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
413k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
413k
    }
1547
512k
    if (near_sad[7] != INT_MAX) {
1548
309k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
309k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
309k
          pre_y_stride);
1551
309k
    }
1552
512k
  }
1553
1554
1.15M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
512k
    insertsortsad(near_sad, near_sadidx, 8);
1556
645k
  } else {
1557
645k
    insertsortsad(near_sad, near_sadidx, 3);
1558
645k
  }
1559
1.15M
}
1560
1561
820k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
820k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
186k
    int i;
1564
1565
1.63M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.44M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
628k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
628k
                              best_ref_mv->as_mv.row) >>
1569
628k
                             1);
1570
628k
        const int row_idx = mv_max + row_val;
1571
628k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
628k
                              best_ref_mv->as_mv.col) >>
1573
628k
                             1);
1574
628k
        const int col_idx = mv_max + col_val;
1575
628k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
628k
            col_idx < MVvals) {
1577
628k
          x->MVcount[0][row_idx]++;
1578
628k
          x->MVcount[1][col_idx]++;
1579
628k
        }
1580
628k
      }
1581
1.44M
    }
1582
633k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
81.0k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
81.0k
                          best_ref_mv->as_mv.row) >>
1585
81.0k
                         1);
1586
81.0k
    const int row_idx = mv_max + row_val;
1587
81.0k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
81.0k
                          best_ref_mv->as_mv.col) >>
1589
81.0k
                         1);
1590
81.0k
    const int col_idx = mv_max + col_val;
1591
81.0k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
81.0k
      x->MVcount[0][row_idx]++;
1593
81.0k
      x->MVcount[1][col_idx]++;
1594
81.0k
    }
1595
81.0k
  }
1596
820k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
3.06M
                                  MACROBLOCK *x) {
1601
3.06M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
3.06M
  BLOCK *b = &x->block[0];
1603
3.06M
  MACROBLOCKD *xd = &x->e_mbd;
1604
3.06M
  int distortion;
1605
3.06M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
3.06M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
3.06M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
3.06M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
3.06M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
3.06M
  rd->rate2 += rd->rate_y;
1649
3.06M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
3.06M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
3.06M
                   cpi->common.full_pixel);
1654
3.06M
  rd->rate2 += rd->rate_uv;
1655
3.06M
  rd->distortion2 += rd->distortion_uv;
1656
3.06M
  return INT_MAX;
1657
3.06M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
7.21M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
7.21M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
7.21M
  if (cpi->common.mb_no_coeff_skip) {
1670
7.21M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
7.21M
    rd->rate2 += *other_cost;
1672
7.21M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
7.21M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
7.21M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
6.51M
    if (cpi->common.mb_no_coeff_skip) {
1685
6.51M
      int i;
1686
6.51M
      int tteob;
1687
6.51M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
6.51M
      tteob = 0;
1690
6.51M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
110M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
6.51M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
31.2M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.46M
      } else {
1697
3.04M
        tteob += uv_intra_tteob;
1698
3.04M
      }
1699
1700
6.51M
      if (tteob == 0) {
1701
457k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
457k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
457k
        if (cpi->prob_skip_false) {
1707
457k
          int prob_skip_cost;
1708
1709
457k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
457k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
457k
          rd->rate2 += prob_skip_cost;
1712
457k
          *other_cost += prob_skip_cost;
1713
457k
        }
1714
457k
      }
1715
6.51M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
6.51M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
6.51M
    if (this_rd < INT_MAX &&
1719
6.51M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
3.04M
      this_rd += intra_rd_penalty;
1721
3.04M
    }
1722
6.51M
  }
1723
7.21M
  return this_rd;
1724
7.21M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.60M
                             MACROBLOCK *x) {
1729
2.60M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.60M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.60M
  best_mode->yrd =
1735
2.60M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.60M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.60M
  best_mode->rd = this_rd;
1739
2.60M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.60M
  best_mode->partition = *x->partition_info;
1741
1742
2.60M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
551k
    int i;
1744
9.37M
    for (i = 0; i < 16; ++i) {
1745
8.82M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
8.82M
    }
1747
551k
  }
1748
2.60M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
820k
                            int mb_col) {
1754
820k
  BLOCK *b = &x->block[0];
1755
820k
  BLOCKD *d = &x->e_mbd.block[0];
1756
820k
  MACROBLOCKD *xd = &x->e_mbd;
1757
820k
  int_mv best_ref_mv_sb[2];
1758
820k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
820k
  int_mv best_ref_mv;
1760
820k
  int_mv *mode_mv;
1761
820k
  MB_PREDICTION_MODE this_mode;
1762
820k
  int num00;
1763
820k
  int best_mode_index = 0;
1764
820k
  BEST_MODE best_mode;
1765
1766
820k
  int i;
1767
820k
  int mode_index;
1768
820k
  int mdcounts[4];
1769
820k
  int rate;
1770
820k
  RATE_DISTORTION rd;
1771
820k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
820k
  int uv_intra_tteob = 0;
1773
820k
  int uv_intra_done = 0;
1774
1775
820k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
820k
  int_mv mvp;
1777
820k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
820k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
820k
  int sr = 0;
1781
1782
820k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
820k
  int ref_frame_map[4];
1784
820k
  int sign_bias = 0;
1785
1786
820k
  int intra_rd_penalty =
1787
820k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
820k
#if CONFIG_TEMPORAL_DENOISING
1790
820k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
820k
               best_rd_sse = UINT_MAX;
1792
820k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
820k
  rd.rate_uv = 0;
1796
820k
  rd.distortion_uv = 0;
1797
1798
820k
  mode_mv = mode_mv_sb[sign_bias];
1799
820k
  best_ref_mv.as_int = 0;
1800
820k
  best_mode.rd = INT_MAX;
1801
820k
  best_mode.yrd = INT_MAX;
1802
820k
  best_mode.intra_rd = INT_MAX;
1803
820k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
820k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
820k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
820k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
820k
  if (ref_frame_map[1] > 0) {
1814
820k
    sign_bias = vp8_find_near_mvs_bias(
1815
820k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
820k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
820k
    mode_mv = mode_mv_sb[sign_bias];
1819
820k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
820k
  }
1821
1822
820k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
820k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
820k
  x->mbs_tested_so_far++;
1827
1828
820k
  x->skip = 0;
1829
1830
17.2M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
16.4M
    int this_rd = INT_MAX;
1832
16.4M
    int disable_skip = 0;
1833
16.4M
    int other_cost = 0;
1834
16.4M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
16.4M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
14.7M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
9.10M
    rd.rate2 = 0;
1845
9.10M
    rd.distortion2 = 0;
1846
1847
9.10M
    this_mode = vp8_mode_order[mode_index];
1848
1849
9.10M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
9.10M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
9.10M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
9.10M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.70M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.70M
             plane[this_ref_frame][1] != NULL &&
1867
5.70M
             plane[this_ref_frame][2] != NULL);
1868
5.70M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.70M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.70M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.70M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.70M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
9.10M
    if (x->mode_test_hit_counts[mode_index] &&
1884
8.43M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
236k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
236k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
126k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
126k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
29.3k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
29.3k
        }
1895
1896
126k
        x->rd_threshes[mode_index] =
1897
126k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
126k
            x->rd_thresh_mult[mode_index];
1899
1900
126k
        continue;
1901
126k
      }
1902
236k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.97M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.97M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.97M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
820k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
820k
                              &uv_intra_distortion);
1936
820k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
7.38M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
820k
      uv_intra_done = 1;
1946
820k
    }
1947
1948
8.97M
    switch (this_mode) {
1949
563k
      case B_PRED: {
1950
563k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
563k
        int distortion;
1956
563k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
563k
                                           best_mode.yrd);
1958
563k
        rd.rate2 += rate;
1959
563k
        rd.distortion2 += distortion;
1960
1961
563k
        if (tmp_rd < best_mode.yrd) {
1962
217k
          assert(uv_intra_done);
1963
217k
          rd.rate2 += uv_intra_rate;
1964
217k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
217k
          rd.distortion2 += uv_intra_distortion;
1966
217k
          rd.distortion_uv = uv_intra_distortion;
1967
346k
        } else {
1968
346k
          this_rd = INT_MAX;
1969
346k
          disable_skip = 1;
1970
346k
        }
1971
563k
        break;
1972
0
      }
1973
1974
757k
      case SPLITMV: {
1975
757k
        int tmp_rd;
1976
757k
        int this_rd_thresh;
1977
757k
        int distortion;
1978
1979
757k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
757k
                             ? x->rd_threshes[THR_NEW1]
1981
757k
                             : x->rd_threshes[THR_NEW3];
1982
757k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
757k
                             ? x->rd_threshes[THR_NEW2]
1984
757k
                             : this_rd_thresh;
1985
1986
757k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
757k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
757k
            &distortion, this_rd_thresh);
1989
1990
757k
        rd.rate2 += rate;
1991
757k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
757k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
406k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
406k
                         cpi->common.full_pixel);
2000
406k
          rd.rate2 += rd.rate_uv;
2001
406k
          rd.distortion2 += rd.distortion_uv;
2002
406k
        } else {
2003
350k
          this_rd = INT_MAX;
2004
350k
          disable_skip = 1;
2005
350k
        }
2006
757k
        break;
2007
0
      }
2008
820k
      case DC_PRED:
2009
1.49M
      case V_PRED:
2010
2.16M
      case H_PRED:
2011
2.83M
      case TM_PRED: {
2012
2.83M
        int distortion;
2013
2.83M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.83M
        vp8_build_intra_predictors_mby_s(
2016
2.83M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.83M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.83M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.83M
        rd.rate2 += rd.rate_y;
2020
2.83M
        rd.distortion2 += distortion;
2021
2.83M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.83M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.83M
        assert(uv_intra_done);
2024
2.83M
        rd.rate2 += uv_intra_rate;
2025
2.83M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.83M
        rd.distortion2 += uv_intra_distortion;
2027
2.83M
        rd.distortion_uv = uv_intra_distortion;
2028
2.83M
        break;
2029
2.16M
      }
2030
2031
1.03M
      case NEWMV: {
2032
1.03M
        int thissme;
2033
1.03M
        int bestsme = INT_MAX;
2034
1.03M
        int step_param = cpi->sf.first_step;
2035
1.03M
        int further_steps;
2036
1.03M
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
1.03M
        int do_refine = 1;
2041
2042
1.03M
        int sadpb = x->sadperbit16;
2043
1.03M
        int_mv mvp_full;
2044
2045
1.03M
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
1.03M
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
1.03M
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
1.03M
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
1.03M
        int tmp_col_min = x->mv_col_min;
2051
1.03M
        int tmp_col_max = x->mv_col_max;
2052
1.03M
        int tmp_row_min = x->mv_row_min;
2053
1.03M
        int tmp_row_max = x->mv_row_max;
2054
2055
1.03M
        if (!saddone) {
2056
695k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
695k
          saddone = 1;
2058
695k
        }
2059
2060
1.03M
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
1.03M
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
1.03M
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
1.03M
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
1.03M
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
1.03M
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
1.03M
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
1.03M
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
1.03M
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
1.03M
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
1.03M
        {
2080
1.03M
          bestsme = cpi->diamond_search_sad(
2081
1.03M
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
1.03M
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
1.03M
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
1.03M
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
1.03M
          n = num00;
2089
1.03M
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
1.03M
          if (n > further_steps) do_refine = 0;
2094
2095
4.47M
          while (n < further_steps) {
2096
3.43M
            n++;
2097
2098
3.43M
            if (num00) {
2099
316k
              num00--;
2100
3.12M
            } else {
2101
3.12M
              thissme = cpi->diamond_search_sad(
2102
3.12M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
3.12M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
3.12M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
3.12M
              if (thissme < bestsme) {
2109
478k
                bestsme = thissme;
2110
478k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.64M
              } else {
2112
2.64M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.64M
              }
2114
3.12M
            }
2115
3.43M
          }
2116
1.03M
        }
2117
2118
        /* final 1-away diamond refining search */
2119
1.03M
        if (do_refine == 1) {
2120
660k
          int search_range;
2121
2122
660k
          search_range = 8;
2123
2124
660k
          thissme = cpi->refining_search_sad(
2125
660k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
660k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
660k
          if (thissme < bestsme) {
2129
27.7k
            bestsme = thissme;
2130
27.7k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
632k
          } else {
2132
632k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
632k
          }
2134
660k
        }
2135
2136
1.03M
        x->mv_col_min = tmp_col_min;
2137
1.03M
        x->mv_col_max = tmp_col_max;
2138
1.03M
        x->mv_row_min = tmp_row_min;
2139
1.03M
        x->mv_row_max = tmp_row_max;
2140
2141
1.03M
        if (bestsme < INT_MAX) {
2142
1.03M
          int dis; /* TODO: use dis in distortion calculation later. */
2143
1.03M
          unsigned int sse;
2144
1.03M
          cpi->find_fractional_mv_step(
2145
1.03M
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
1.03M
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
1.03M
        }
2148
2149
1.03M
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
1.03M
        rd.rate2 +=
2153
1.03M
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
1.03M
      }
2155
        // fall through
2156
2157
2.29M
      case NEARESTMV:
2158
3.55M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.55M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.55M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
2.52M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.76M
          continue;
2171
1.76M
        }
2172
        // fall through
2173
2174
3.06M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
3.06M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
3.06M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
3.06M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
3.06M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
3.06M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
3.06M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
3.06M
        break;
2191
2192
0
      default: break;
2193
8.97M
    }
2194
2195
7.21M
    this_rd =
2196
7.21M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
7.21M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
7.21M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
3.39M
        (this_rd < best_mode.intra_rd)) {
2202
1.19M
      best_mode.intra_rd = this_rd;
2203
1.19M
      *returnintra = rd.distortion2;
2204
1.19M
    }
2205
7.21M
#if CONFIG_TEMPORAL_DENOISING
2206
7.21M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
7.21M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
7.21M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.60M
      best_mode_index = mode_index;
2238
2.60M
      *returnrate = rd.rate2;
2239
2.60M
      *returndistortion = rd.distortion2;
2240
2.60M
      if (this_mode <= B_PRED) {
2241
1.05M
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
1.05M
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
1.05M
      }
2245
2.60M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.60M
      x->rd_thresh_mult[mode_index] =
2251
2.60M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.60M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.60M
              : MIN_THRESHMULT;
2254
2.60M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.60M
    else {
2260
4.60M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.60M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.47M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.47M
      }
2265
4.60M
    }
2266
7.21M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
7.21M
                                 x->rd_thresh_mult[mode_index];
2268
2269
7.21M
    if (x->skip) break;
2270
7.21M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
820k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
499k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
499k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
499k
    x->rd_thresh_mult[best_mode_index] =
2278
499k
        (x->rd_thresh_mult[best_mode_index] >=
2279
499k
         (MIN_THRESHMULT + best_adjustment))
2280
499k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
499k
            : MIN_THRESHMULT;
2282
499k
    x->rd_threshes[best_mode_index] =
2283
499k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
499k
        x->rd_thresh_mult[best_mode_index];
2285
499k
  }
2286
2287
820k
#if CONFIG_TEMPORAL_DENOISING
2288
820k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
820k
#endif
2335
2336
820k
  if (cpi->is_src_frame_alt_ref &&
2337
0
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
820k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
820k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.61M
    for (i = 0; i < 16; ++i) {
2354
3.40M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.40M
    }
2356
212k
  }
2357
2358
820k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
3.17M
    for (i = 0; i < 16; ++i) {
2360
2.99M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.99M
    }
2362
2363
186k
    *x->partition_info = best_mode.partition;
2364
2365
186k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
186k
        x->partition_info->bmi[15].mv.as_int;
2367
186k
  }
2368
2369
820k
  if (sign_bias !=
2370
820k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
820k
  rd_update_mvcount(x, &best_ref_mv);
2375
820k
}
2376
2377
800k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
800k
  int error4x4, error16x16;
2379
800k
  int rate4x4, rate16x16 = 0, rateuv;
2380
800k
  int dist4x4, dist16x16, distuv;
2381
800k
  int rate_;
2382
800k
  int rate4x4_tokenonly = 0;
2383
800k
  int rate16x16_tokenonly = 0;
2384
800k
  int rateuv_tokenonly = 0;
2385
2386
800k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
800k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
800k
  rate_ = rateuv;
2390
2391
800k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
800k
                                          &dist16x16);
2393
2394
800k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
800k
                                       &dist4x4, error16x16);
2396
2397
800k
  if (error4x4 < error16x16) {
2398
303k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
303k
    rate_ += rate4x4;
2400
497k
  } else {
2401
497k
    rate_ += rate16x16;
2402
497k
  }
2403
2404
800k
  *rate = rate_;
2405
800k
}