Coverage Report

Created: 2025-12-31 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp8/encoder/rdopt.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <stdio.h>
13
#include <math.h>
14
#include <limits.h>
15
#include <assert.h>
16
#include "vpx_config.h"
17
#include "vp8_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
#include "encodeframe.h"
20
#include "tokenize.h"
21
#include "treewriter.h"
22
#include "onyx_int.h"
23
#include "modecosts.h"
24
#include "encodeintra.h"
25
#include "pickinter.h"
26
#include "vp8/common/common.h"
27
#include "vp8/common/entropymode.h"
28
#include "vp8/common/reconinter.h"
29
#include "vp8/common/reconintra.h"
30
#include "vp8/common/reconintra4x4.h"
31
#include "vp8/common/findnearmv.h"
32
#include "vp8/common/quant_common.h"
33
#include "encodemb.h"
34
#include "vp8/encoder/quantize.h"
35
#include "vpx_dsp/variance.h"
36
#include "vpx_ports/system_state.h"
37
#include "mcomp.h"
38
#include "rdopt.h"
39
#include "vpx_mem/vpx_mem.h"
40
#include "vp8/common/systemdependent.h"
41
#if CONFIG_TEMPORAL_DENOISING
42
#include "denoising.h"
43
#endif
44
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
45
46
1.54M
#define MAXF(a, b) (((a) > (b)) ? (a) : (b))
47
48
typedef struct rate_distortion_struct {
49
  int rate2;
50
  int rate_y;
51
  int rate_uv;
52
  int distortion2;
53
  int distortion_uv;
54
} RATE_DISTORTION;
55
56
typedef struct best_mode_struct {
57
  int yrd;
58
  int rd;
59
  int intra_rd;
60
  MB_MODE_INFO mbmode;
61
  union b_mode_info bmodes[16];
62
  PARTITION_INFO partition;
63
} BEST_MODE;
64
65
static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
66
                                           120,  115, 115, 115, 115, 115,
67
                                           115,  115, 115, 115, 105 };
68
69
const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
70
  ZEROMV,    DC_PRED,
71
72
  NEARESTMV, NEARMV,
73
74
  ZEROMV,    NEARESTMV,
75
76
  ZEROMV,    NEARESTMV,
77
78
  NEARMV,    NEARMV,
79
80
  V_PRED,    H_PRED,    TM_PRED,
81
82
  NEWMV,     NEWMV,     NEWMV,
83
84
  SPLITMV,   SPLITMV,   SPLITMV,
85
86
  B_PRED,
87
};
88
89
/* This table determines the search order in reference frame priority order,
90
 * which may not necessarily match INTRA,LAST,GOLDEN,ARF
91
 */
92
const int vp8_ref_frame_order[MAX_MODES] = {
93
  1, 0,
94
95
  1, 1,
96
97
  2, 2,
98
99
  3, 3,
100
101
  2, 3,
102
103
  0, 0, 0,
104
105
  1, 2, 3,
106
107
  1, 2, 3,
108
109
  0,
110
};
111
112
static void fill_token_costs(
113
    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
114
    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
115
137k
                    [ENTROPY_NODES]) {
116
137k
  int i, j, k;
117
118
689k
  for (i = 0; i < BLOCK_TYPES; ++i) {
119
4.96M
    for (j = 0; j < COEF_BANDS; ++j) {
120
17.6M
      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
121
        /* check for pt=0 and band > 1 if block type 0
122
         * and 0 if blocktype 1
123
         */
124
13.2M
        if (k == 0 && j > (i == 0)) {
125
3.72M
          vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
126
9.52M
        } else {
127
9.52M
          vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
128
9.52M
        }
129
13.2M
      }
130
4.41M
    }
131
551k
  }
132
137k
}
133
134
static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
135
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
                                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
137
138
/* values are now correlated to quantizer */
139
static const int sad_per_bit16lut[QINDEX_RANGE] = {
140
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
141
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
142
  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
143
  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
144
  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
145
  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
146
  11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
147
};
148
static const int sad_per_bit4lut[QINDEX_RANGE] = {
149
  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
150
  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
151
  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
152
  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
153
  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
154
  12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
155
  16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
156
};
157
158
137k
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
159
137k
  cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
160
137k
  cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
161
137k
}
162
163
137k
void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
164
137k
  int q;
165
137k
  int i;
166
137k
  double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
167
137k
  double rdconst = 2.80;
168
169
137k
  vpx_clear_system_state();
170
171
  /* Further tests required to see if optimum is different
172
   * for key frames, golden frames and arf frames.
173
   */
174
137k
  cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
175
176
  /* Extend rate multiplier along side quantizer zbin increases */
177
137k
  if (cpi->mb.zbin_over_quant > 0) {
178
31.6k
    double oq_factor;
179
31.6k
    double modq;
180
181
    /* Experimental code using the same basic equation as used for Q above
182
     * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
183
     */
184
31.6k
    oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
185
31.6k
    modq = (int)((double)capped_q * oq_factor);
186
31.6k
    cpi->RDMULT = (int)(rdconst * (modq * modq));
187
31.6k
  }
188
189
137k
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190
0
    if (cpi->twopass.next_iiratio > 31) {
191
0
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
192
0
    } else {
193
0
      cpi->RDMULT +=
194
0
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
195
0
    }
196
0
  }
197
198
137k
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
199
137k
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200
201
137k
  vp8_set_speed_features(cpi);
202
203
2.89M
  for (i = 0; i < MAX_MODES; ++i) {
204
2.75M
    x->mode_test_hit_counts[i] = 0;
205
2.75M
  }
206
207
137k
  q = (int)pow(Qvalue, 1.25);
208
209
137k
  if (q < 8) q = 8;
210
211
137k
  if (cpi->RDMULT > 1000) {
212
77.3k
    cpi->RDDIV = 1;
213
77.3k
    cpi->RDMULT /= 100;
214
215
1.62M
    for (i = 0; i < MAX_MODES; ++i) {
216
1.54M
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
217
1.47M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
218
1.47M
      } else {
219
77.2k
        x->rd_threshes[i] = INT_MAX;
220
77.2k
      }
221
222
1.54M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
223
1.54M
    }
224
77.3k
  } else {
225
60.6k
    cpi->RDDIV = 100;
226
227
1.27M
    for (i = 0; i < MAX_MODES; ++i) {
228
1.21M
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
229
1.13M
        x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
230
1.13M
      } else {
231
78.8k
        x->rd_threshes[i] = INT_MAX;
232
78.8k
      }
233
234
1.21M
      cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
235
1.21M
    }
236
60.6k
  }
237
238
137k
  {
239
    /* build token cost array for the type of frame we have now */
240
137k
    FRAME_CONTEXT *l = &cpi->lfc_n;
241
242
137k
    if (cpi->common.refresh_alt_ref_frame) {
243
30.2k
      l = &cpi->lfc_a;
244
107k
    } else if (cpi->common.refresh_golden_frame) {
245
10.9k
      l = &cpi->lfc_g;
246
10.9k
    }
247
248
137k
    fill_token_costs(cpi->mb.token_costs,
249
137k
                     (const vp8_prob(*)[8][3][11])l->coef_probs);
250
    /*
251
    fill_token_costs(
252
        cpi->mb.token_costs,
253
        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
254
    */
255
256
    /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
257
137k
    vp8_init_mode_costs(cpi);
258
137k
  }
259
137k
}
260
261
52.0k
void vp8_auto_select_speed(VP8_COMP *cpi) {
262
52.0k
  int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
263
264
52.0k
  milliseconds_for_compress =
265
52.0k
      milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
266
267
#if 0
268
269
    if (0)
270
    {
271
        FILE *f;
272
273
        f = fopen("speed.stt", "a");
274
        fprintf(f, " %8ld %10ld %10ld %10ld\n",
275
                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
276
        fclose(f);
277
    }
278
279
#endif
280
281
52.0k
  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
282
52.0k
      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
283
52.0k
          milliseconds_for_compress) {
284
52.0k
    if (cpi->avg_pick_mode_time == 0) {
285
2.81k
      cpi->Speed = 4;
286
49.2k
    } else {
287
49.2k
      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
288
1
        cpi->Speed += 2;
289
1
        cpi->avg_pick_mode_time = 0;
290
1
        cpi->avg_encode_time = 0;
291
292
1
        if (cpi->Speed > 16) {
293
0
          cpi->Speed = 16;
294
0
        }
295
1
      }
296
297
49.2k
      if (milliseconds_for_compress * 100 >
298
49.2k
          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
299
49.1k
        cpi->Speed -= 1;
300
49.1k
        cpi->avg_pick_mode_time = 0;
301
49.1k
        cpi->avg_encode_time = 0;
302
303
        /* In real-time mode, cpi->speed is in [4, 16]. */
304
49.1k
        if (cpi->Speed < 4) {
305
49.1k
          cpi->Speed = 4;
306
49.1k
        }
307
49.1k
      }
308
49.2k
    }
309
52.0k
  } else {
310
0
    cpi->Speed += 4;
311
312
0
    if (cpi->Speed > 16) cpi->Speed = 16;
313
314
0
    cpi->avg_pick_mode_time = 0;
315
0
    cpi->avg_encode_time = 0;
316
0
  }
317
52.0k
}
318
319
0
int vp8_block_error_c(short *coeff, short *dqcoeff) {
320
0
  int i;
321
0
  int error = 0;
322
323
0
  for (i = 0; i < 16; ++i) {
324
0
    int this_diff = coeff[i] - dqcoeff[i];
325
0
    error += this_diff * this_diff;
326
0
  }
327
328
0
  return error;
329
0
}
330
331
0
int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
332
0
  BLOCK *be;
333
0
  BLOCKD *bd;
334
0
  int i, j;
335
0
  int berror, error = 0;
336
337
0
  for (i = 0; i < 16; ++i) {
338
0
    be = &mb->block[i];
339
0
    bd = &mb->e_mbd.block[i];
340
341
0
    berror = 0;
342
343
0
    for (j = dc; j < 16; ++j) {
344
0
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
345
0
      berror += this_diff * this_diff;
346
0
    }
347
348
0
    error += berror;
349
0
  }
350
351
0
  return error;
352
0
}
353
354
0
int vp8_mbuverror_c(MACROBLOCK *mb) {
355
0
  BLOCK *be;
356
0
  BLOCKD *bd;
357
358
0
  int i;
359
0
  int error = 0;
360
361
0
  for (i = 16; i < 24; ++i) {
362
0
    be = &mb->block[i];
363
0
    bd = &mb->e_mbd.block[i];
364
365
0
    error += vp8_block_error_c(be->coeff, bd->dqcoeff);
366
0
  }
367
368
0
  return error;
369
0
}
370
371
17.4k
int VP8_UVSSE(MACROBLOCK *x) {
372
17.4k
  unsigned char *uptr, *vptr;
373
17.4k
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
374
17.4k
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
375
17.4k
  int uv_stride = x->block[16].src_stride;
376
377
17.4k
  unsigned int sse1 = 0;
378
17.4k
  unsigned int sse2 = 0;
379
17.4k
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
380
17.4k
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
381
17.4k
  int offset;
382
17.4k
  int pre_stride = x->e_mbd.pre.uv_stride;
383
384
17.4k
  if (mv_row < 0) {
385
536
    mv_row -= 1;
386
16.9k
  } else {
387
16.9k
    mv_row += 1;
388
16.9k
  }
389
390
17.4k
  if (mv_col < 0) {
391
1.13k
    mv_col -= 1;
392
16.3k
  } else {
393
16.3k
    mv_col += 1;
394
16.3k
  }
395
396
17.4k
  mv_row /= 2;
397
17.4k
  mv_col /= 2;
398
399
17.4k
  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
400
17.4k
  uptr = x->e_mbd.pre.u_buffer + offset;
401
17.4k
  vptr = x->e_mbd.pre.v_buffer + offset;
402
403
17.4k
  if ((mv_row | mv_col) & 7) {
404
2.16k
    vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
405
2.16k
                              upred_ptr, uv_stride, &sse2);
406
2.16k
    vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
407
2.16k
                              vpred_ptr, uv_stride, &sse1);
408
2.16k
    sse2 += sse1;
409
15.3k
  } else {
410
15.3k
    vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
411
15.3k
    vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
412
15.3k
    sse2 += sse1;
413
15.3k
  }
414
17.4k
  return sse2;
415
17.4k
}
416
417
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
418
434M
                       ENTROPY_CONTEXT *l) {
419
434M
  int c = !type; /* start at coef 0, unless Y with Y2 */
420
434M
  int eob = (int)(*b->eob);
421
434M
  int pt; /* surrounding block/prev coef predictor */
422
434M
  int cost = 0;
423
434M
  short *qcoeff_ptr = b->qcoeff;
424
425
434M
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
426
427
434M
  assert(eob <= 16);
428
4.22G
  for (; c < eob; ++c) {
429
3.78G
    const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
430
3.78G
    const int t = vp8_dct_value_tokens_ptr[v].Token;
431
3.78G
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
432
3.78G
    cost += vp8_dct_value_cost_ptr[v];
433
3.78G
    pt = vp8_prev_token_class[t];
434
3.78G
  }
435
436
434M
  if (c < 16) {
437
282M
    cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
438
282M
  }
439
440
434M
  pt = (c != !type); /* is eob first coefficient; */
441
434M
  *a = *l = pt;
442
443
434M
  return cost;
444
434M
}
445
446
8.18M
static int vp8_rdcost_mby(MACROBLOCK *mb) {
447
8.18M
  int cost = 0;
448
8.18M
  int b;
449
8.18M
  MACROBLOCKD *x = &mb->e_mbd;
450
8.18M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
451
8.18M
  ENTROPY_CONTEXT *ta;
452
8.18M
  ENTROPY_CONTEXT *tl;
453
454
8.18M
  t_above = *mb->e_mbd.above_context;
455
8.18M
  t_left = *mb->e_mbd.left_context;
456
457
8.18M
  ta = (ENTROPY_CONTEXT *)&t_above;
458
8.18M
  tl = (ENTROPY_CONTEXT *)&t_left;
459
460
139M
  for (b = 0; b < 16; ++b) {
461
130M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
462
130M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
463
130M
  }
464
465
8.18M
  cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
466
8.18M
                      ta + vp8_block2above[24], tl + vp8_block2left[24]);
467
468
8.18M
  return cost;
469
8.18M
}
470
471
8.18M
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
472
8.18M
  int b;
473
8.18M
  MACROBLOCKD *const x = &mb->e_mbd;
474
8.18M
  BLOCK *const mb_y2 = mb->block + 24;
475
8.18M
  BLOCKD *const x_y2 = x->block + 24;
476
8.18M
  short *Y2DCPtr = mb_y2->src_diff;
477
8.18M
  BLOCK *beptr;
478
8.18M
  int d;
479
480
8.18M
  vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
481
8.18M
                   mb->block[0].src_stride, mb->e_mbd.predictor, 16);
482
483
  /* Fdct and building the 2nd order block */
484
73.6M
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
485
65.4M
    mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
486
65.4M
    *Y2DCPtr++ = beptr->coeff[0];
487
65.4M
    *Y2DCPtr++ = beptr->coeff[16];
488
65.4M
  }
489
490
  /* 2nd order fdct */
491
8.18M
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
492
493
  /* Quantization */
494
139M
  for (b = 0; b < 16; ++b) {
495
130M
    mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
496
130M
  }
497
498
  /* DC predication and Quantization of 2nd Order block */
499
8.18M
  mb->quantize_b(mb_y2, x_y2);
500
501
  /* Distortion */
502
8.18M
  d = vp8_mbblock_error(mb, 1) << 2;
503
8.18M
  d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
504
505
8.18M
  *Distortion = (d >> 4);
506
507
  /* rate */
508
8.18M
  *Rate = vp8_rdcost_mby(mb);
509
8.18M
}
510
511
24.1M
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
512
24.1M
  const unsigned int *p = (const unsigned int *)predictor;
513
24.1M
  unsigned int *d = (unsigned int *)dst;
514
24.1M
  d[0] = p[0];
515
24.1M
  d[4] = p[4];
516
24.1M
  d[8] = p[8];
517
24.1M
  d[12] = p[12];
518
24.1M
}
519
static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
520
                                 B_PREDICTION_MODE *best_mode,
521
                                 const int *bmode_costs, ENTROPY_CONTEXT *a,
522
                                 ENTROPY_CONTEXT *l,
523
524
                                 int *bestrate, int *bestratey,
525
13.0M
                                 int *bestdistortion) {
526
13.0M
  B_PREDICTION_MODE mode;
527
13.0M
  int best_rd = INT_MAX;
528
13.0M
  int rate = 0;
529
13.0M
  int distortion;
530
531
13.0M
  ENTROPY_CONTEXT ta = *a, tempa = *a;
532
13.0M
  ENTROPY_CONTEXT tl = *l, templ = *l;
533
  /*
534
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
535
   * a temp buffer that meets the stride requirements, but we are only
536
   * interested in the left 4x4 block
537
   * */
538
13.0M
  DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
539
13.0M
  DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
540
13.0M
  int dst_stride = x->e_mbd.dst.y_stride;
541
13.0M
  unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
542
543
13.0M
  unsigned char *Above = dst - dst_stride;
544
13.0M
  unsigned char *yleft = dst - 1;
545
13.0M
  unsigned char top_left = Above[-1];
546
547
143M
  for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
548
130M
    int this_rd;
549
130M
    int ratey;
550
551
130M
    rate = bmode_costs[mode];
552
553
130M
    vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
554
130M
                         top_left);
555
130M
    vp8_subtract_b(be, b, 16);
556
130M
    x->short_fdct4x4(be->src_diff, be->coeff, 32);
557
130M
    x->quantize_b(be, b);
558
559
130M
    tempa = ta;
560
130M
    templ = tl;
561
562
130M
    ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
563
130M
    rate += ratey;
564
130M
    distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
565
566
130M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
567
568
130M
    if (this_rd < best_rd) {
569
24.1M
      *bestrate = rate;
570
24.1M
      *bestratey = ratey;
571
24.1M
      *bestdistortion = distortion;
572
24.1M
      best_rd = this_rd;
573
24.1M
      *best_mode = mode;
574
24.1M
      *a = tempa;
575
24.1M
      *l = templ;
576
24.1M
      copy_predictor(best_predictor, b->predictor);
577
24.1M
      memcpy(best_dqcoeff, b->dqcoeff, 32);
578
24.1M
    }
579
130M
  }
580
13.0M
  b->bmi.as_mode = *best_mode;
581
582
13.0M
  vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
583
584
13.0M
  return best_rd;
585
13.0M
}
586
587
static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
588
1.18M
                                     int *Distortion, int best_rd) {
589
1.18M
  MACROBLOCKD *const xd = &mb->e_mbd;
590
1.18M
  int i;
591
1.18M
  int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
592
1.18M
  int distortion = 0;
593
1.18M
  int tot_rate_y = 0;
594
1.18M
  int64_t total_rd = 0;
595
1.18M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
596
1.18M
  ENTROPY_CONTEXT *ta;
597
1.18M
  ENTROPY_CONTEXT *tl;
598
1.18M
  const int *bmode_costs;
599
600
1.18M
  t_above = *mb->e_mbd.above_context;
601
1.18M
  t_left = *mb->e_mbd.left_context;
602
603
1.18M
  ta = (ENTROPY_CONTEXT *)&t_above;
604
1.18M
  tl = (ENTROPY_CONTEXT *)&t_left;
605
606
1.18M
  intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
607
608
1.18M
  bmode_costs = mb->inter_bmode_costs;
609
610
13.5M
  for (i = 0; i < 16; ++i) {
611
13.0M
    MODE_INFO *const mic = xd->mode_info_context;
612
13.0M
    const int mis = xd->mode_info_stride;
613
13.0M
    B_PREDICTION_MODE best_mode = B_MODE_COUNT;
614
13.0M
    int r = 0, ry = 0, d = 0;
615
616
13.0M
    if (mb->e_mbd.frame_type == KEY_FRAME) {
617
7.08M
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
618
7.08M
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
619
620
7.08M
      bmode_costs = mb->bmode_costs[A][L];
621
7.08M
    }
622
623
13.0M
    total_rd += rd_pick_intra4x4block(
624
13.0M
        mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
625
13.0M
        ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
626
627
13.0M
    cost += r;
628
13.0M
    distortion += d;
629
13.0M
    tot_rate_y += ry;
630
631
13.0M
    assert(best_mode != B_MODE_COUNT);
632
13.0M
    mic->bmi[i].as_mode = best_mode;
633
634
13.0M
    if (total_rd >= (int64_t)best_rd) break;
635
13.0M
  }
636
637
1.18M
  if (total_rd >= (int64_t)best_rd) return INT_MAX;
638
639
506k
  *Rate = cost;
640
506k
  *rate_y = tot_rate_y;
641
506k
  *Distortion = distortion;
642
643
506k
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
644
1.18M
}
645
646
static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
647
652k
                                      int *Distortion) {
648
652k
  MB_PREDICTION_MODE mode;
649
652k
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
650
652k
  int rate, ratey;
651
652k
  int distortion;
652
652k
  int best_rd = INT_MAX;
653
652k
  int this_rd;
654
652k
  MACROBLOCKD *xd = &x->e_mbd;
655
656
  /* Y Search for 16x16 intra prediction mode */
657
3.26M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
658
2.61M
    xd->mode_info_context->mbmi.mode = mode;
659
660
2.61M
    vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
661
2.61M
                                     xd->dst.y_buffer - 1, xd->dst.y_stride,
662
2.61M
                                     xd->predictor, 16);
663
664
2.61M
    macro_block_yrd(x, &ratey, &distortion);
665
2.61M
    rate = ratey +
666
2.61M
           x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
667
668
2.61M
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
669
670
2.61M
    if (this_rd < best_rd) {
671
801k
      mode_selected = mode;
672
801k
      best_rd = this_rd;
673
801k
      *Rate = rate;
674
801k
      *rate_y = ratey;
675
801k
      *Distortion = distortion;
676
801k
    }
677
2.61M
  }
678
679
652k
  assert(mode_selected != MB_MODE_COUNT);
680
652k
  xd->mode_info_context->mbmi.mode = mode_selected;
681
652k
  return best_rd;
682
652k
}
683
684
9.00M
static int rd_cost_mbuv(MACROBLOCK *mb) {
685
9.00M
  int b;
686
9.00M
  int cost = 0;
687
9.00M
  MACROBLOCKD *x = &mb->e_mbd;
688
9.00M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
689
9.00M
  ENTROPY_CONTEXT *ta;
690
9.00M
  ENTROPY_CONTEXT *tl;
691
692
9.00M
  t_above = *mb->e_mbd.above_context;
693
9.00M
  t_left = *mb->e_mbd.left_context;
694
695
9.00M
  ta = (ENTROPY_CONTEXT *)&t_above;
696
9.00M
  tl = (ENTROPY_CONTEXT *)&t_left;
697
698
81.0M
  for (b = 16; b < 24; ++b) {
699
72.0M
    cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
700
72.0M
                        ta + vp8_block2above[b], tl + vp8_block2left[b]);
701
72.0M
  }
702
703
9.00M
  return cost;
704
9.00M
}
705
706
static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
707
2.93M
                            int *distortion, int fullpixel) {
708
2.93M
  (void)cpi;
709
2.93M
  (void)fullpixel;
710
711
2.93M
  vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
712
2.93M
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
713
2.93M
                    x->src.uv_stride, &x->e_mbd.predictor[256],
714
2.93M
                    &x->e_mbd.predictor[320], 8);
715
716
2.93M
  vp8_transform_mbuv(x);
717
2.93M
  vp8_quantize_mbuv(x);
718
719
2.93M
  *rate = rd_cost_mbuv(x);
720
2.93M
  *distortion = vp8_mbuverror(x) / 4;
721
722
2.93M
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
723
2.93M
}
724
725
static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
726
385k
                          int *distortion, int fullpixel) {
727
385k
  (void)cpi;
728
385k
  (void)fullpixel;
729
730
385k
  vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
731
385k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
732
385k
                    x->src.uv_stride, &x->e_mbd.predictor[256],
733
385k
                    &x->e_mbd.predictor[320], 8);
734
735
385k
  vp8_transform_mbuv(x);
736
385k
  vp8_quantize_mbuv(x);
737
738
385k
  *rate = rd_cost_mbuv(x);
739
385k
  *distortion = vp8_mbuverror(x) / 4;
740
741
385k
  return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
742
385k
}
743
744
static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
745
1.41M
                                    int *rate_tokenonly, int *distortion) {
746
1.41M
  MB_PREDICTION_MODE mode;
747
1.41M
  MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
748
1.41M
  int best_rd = INT_MAX;
749
1.41M
  int d = 0, r = 0;
750
1.41M
  int rate_to;
751
1.41M
  MACROBLOCKD *xd = &x->e_mbd;
752
753
7.09M
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
754
5.67M
    int this_rate;
755
5.67M
    int this_distortion;
756
5.67M
    int this_rd;
757
758
5.67M
    xd->mode_info_context->mbmi.uv_mode = mode;
759
760
5.67M
    vp8_build_intra_predictors_mbuv_s(
761
5.67M
        xd, xd->dst.u_buffer - xd->dst.uv_stride,
762
5.67M
        xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
763
5.67M
        xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
764
5.67M
        &xd->predictor[320], 8);
765
766
5.67M
    vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
767
5.67M
                      x->src.uv_stride, &xd->predictor[256],
768
5.67M
                      &xd->predictor[320], 8);
769
5.67M
    vp8_transform_mbuv(x);
770
5.67M
    vp8_quantize_mbuv(x);
771
772
5.67M
    rate_to = rd_cost_mbuv(x);
773
5.67M
    this_rate =
774
5.67M
        rate_to + x->intra_uv_mode_cost[xd->frame_type]
775
5.67M
                                       [xd->mode_info_context->mbmi.uv_mode];
776
777
5.67M
    this_distortion = vp8_mbuverror(x) / 4;
778
779
5.67M
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
780
781
5.67M
    if (this_rd < best_rd) {
782
1.76M
      best_rd = this_rd;
783
1.76M
      d = this_distortion;
784
1.76M
      r = this_rate;
785
1.76M
      *rate_tokenonly = rate_to;
786
1.76M
      mode_selected = mode;
787
1.76M
    }
788
5.67M
  }
789
790
1.41M
  *rate = r;
791
1.41M
  *distortion = d;
792
793
1.41M
  assert(mode_selected != MB_MODE_COUNT);
794
1.41M
  xd->mode_info_context->mbmi.uv_mode = mode_selected;
795
1.41M
}
796
797
7.07M
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
798
7.07M
  vp8_prob p[VP8_MVREFS - 1];
799
7.07M
  assert(NEARESTMV <= m && m <= SPLITMV);
800
7.07M
  vp8_mv_ref_probs(p, near_mv_ref_ct);
801
7.07M
  return vp8_cost_token(vp8_mv_ref_tree, p,
802
7.07M
                        vp8_mv_ref_encoding_array + (m - NEARESTMV));
803
7.07M
}
804
805
2.93M
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
806
2.93M
  x->e_mbd.mode_info_context->mbmi.mode = mb;
807
2.93M
  x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
808
2.93M
}
809
810
static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
811
                       B_PREDICTION_MODE this_mode, int_mv *this_mv,
812
30.9M
                       int_mv *best_ref_mv, int *mvcost[2]) {
813
30.9M
  MACROBLOCKD *const xd = &x->e_mbd;
814
30.9M
  MODE_INFO *const mic = xd->mode_info_context;
815
30.9M
  const int mis = xd->mode_info_stride;
816
817
30.9M
  int cost = 0;
818
30.9M
  int thismvcost = 0;
819
820
  /* We have to be careful retrieving previously-encoded motion vectors.
821
     Ones from this macroblock have to be pulled from the BLOCKD array
822
     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
823
824
30.9M
  int i = 0;
825
826
494M
  do {
827
494M
    BLOCKD *const d = xd->block + i;
828
494M
    const int row = i >> 2, col = i & 3;
829
830
494M
    B_PREDICTION_MODE m;
831
832
494M
    if (labelings[i] != which_label) continue;
833
834
119M
    if (col && labelings[i] == labelings[i - 1]) {
835
61.8M
      m = LEFT4X4;
836
61.8M
    } else if (row && labelings[i] == labelings[i - 4]) {
837
27.2M
      m = ABOVE4X4;
838
30.9M
    } else {
839
      /* the only time we should do costing for new motion vector
840
       * or mode is when we are on a new label  (jbb May 08, 2007)
841
       */
842
30.9M
      switch (m = this_mode) {
843
8.53M
        case NEW4X4:
844
8.53M
          thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
845
8.53M
          break;
846
9.03M
        case LEFT4X4:
847
9.03M
          this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
848
9.03M
          break;
849
7.00M
        case ABOVE4X4:
850
7.00M
          this_mv->as_int =
851
7.00M
              row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
852
7.00M
          break;
853
6.35M
        case ZERO4X4: this_mv->as_int = 0; break;
854
0
        default: break;
855
30.9M
      }
856
857
30.9M
      if (m == ABOVE4X4) { /* replace above with left if same */
858
7.00M
        int_mv left_mv;
859
860
7.00M
        left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
861
862
7.00M
        if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
863
7.00M
      }
864
865
30.9M
      cost = x->inter_bmode_costs[m];
866
30.9M
    }
867
868
119M
    d->bmi.mv.as_int = this_mv->as_int;
869
870
119M
    x->partition_info->bmi[i].mode = m;
871
119M
    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
872
873
494M
  } while (++i < 16);
874
875
30.9M
  cost += thismvcost;
876
30.9M
  return cost;
877
30.9M
}
878
879
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
880
                              int which_label, ENTROPY_CONTEXT *ta,
881
24.2M
                              ENTROPY_CONTEXT *tl) {
882
24.2M
  int cost = 0;
883
24.2M
  int b;
884
24.2M
  MACROBLOCKD *x = &mb->e_mbd;
885
886
411M
  for (b = 0; b < 16; ++b) {
887
387M
    if (labels[b] == which_label) {
888
93.7M
      cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
889
93.7M
                          ta + vp8_block2above[b], tl + vp8_block2left[b]);
890
93.7M
    }
891
387M
  }
892
893
24.2M
  return cost;
894
24.2M
}
895
static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
896
                                                int const *labels,
897
24.2M
                                                int which_label) {
898
24.2M
  int i;
899
24.2M
  unsigned int distortion = 0;
900
24.2M
  int pre_stride = x->e_mbd.pre.y_stride;
901
24.2M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
902
903
411M
  for (i = 0; i < 16; ++i) {
904
387M
    if (labels[i] == which_label) {
905
93.7M
      BLOCKD *bd = &x->e_mbd.block[i];
906
93.7M
      BLOCK *be = &x->block[i];
907
908
93.7M
      vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
909
93.7M
                                   x->e_mbd.subpixel_predict);
910
93.7M
      vp8_subtract_b(be, bd, 16);
911
93.7M
      x->short_fdct4x4(be->src_diff, be->coeff, 32);
912
93.7M
      x->quantize_b(be, bd);
913
914
93.7M
      distortion += vp8_block_error(be->coeff, bd->dqcoeff);
915
93.7M
    }
916
387M
  }
917
918
24.2M
  return distortion;
919
24.2M
}
920
921
static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
922
923
typedef struct {
924
  int_mv *ref_mv;
925
  int_mv mvp;
926
927
  int segment_rd;
928
  int segment_num;
929
  int r;
930
  int d;
931
  int segment_yrate;
932
  B_PREDICTION_MODE modes[16];
933
  int_mv mvs[16];
934
  unsigned char eobs[16];
935
936
  int mvthresh;
937
  int *mdcounts;
938
939
  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
940
  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
941
942
} BEST_SEG_INFO;
943
944
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
945
1.65M
                             unsigned int segmentation) {
946
1.65M
  int i;
947
1.65M
  int const *labels;
948
1.65M
  int br = 0;
949
1.65M
  int bd = 0;
950
1.65M
  B_PREDICTION_MODE this_mode;
951
952
1.65M
  int label_count;
953
1.65M
  int this_segment_rd = 0;
954
1.65M
  int label_mv_thresh;
955
1.65M
  int rate = 0;
956
1.65M
  int sbr = 0;
957
1.65M
  int sbd = 0;
958
1.65M
  int segmentyrate = 0;
959
960
1.65M
  vp8_variance_fn_ptr_t *v_fn_ptr;
961
962
1.65M
  ENTROPY_CONTEXT_PLANES t_above, t_left;
963
1.65M
  ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
964
965
1.65M
  t_above = *x->e_mbd.above_context;
966
1.65M
  t_left = *x->e_mbd.left_context;
967
968
1.65M
  vp8_zero(t_above_b);
969
1.65M
  vp8_zero(t_left_b);
970
971
1.65M
  br = 0;
972
1.65M
  bd = 0;
973
974
1.65M
  v_fn_ptr = &cpi->fn_ptr[segmentation];
975
1.65M
  labels = vp8_mbsplits[segmentation];
976
1.65M
  label_count = vp8_mbsplit_count[segmentation];
977
978
  /* 64 makes this threshold really big effectively making it so that we
979
   * very rarely check mvs on segments.   setting this to 1 would make mv
980
   * thresh roughly equal to what it is for macroblocks
981
   */
982
1.65M
  label_mv_thresh = 1 * bsi->mvthresh / label_count;
983
984
  /* Segmentation method overheads */
985
1.65M
  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
986
1.65M
                        vp8_mbsplit_encodings + segmentation);
987
1.65M
  rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
988
1.65M
  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
989
1.65M
  br += rate;
990
991
7.06M
  for (i = 0; i < label_count; ++i) {
992
6.28M
    int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } };
993
6.28M
    int best_label_rd = INT_MAX;
994
6.28M
    B_PREDICTION_MODE mode_selected = ZERO4X4;
995
6.28M
    int bestlabelyrate = 0;
996
997
    /* search for the best motion vector on this segment */
998
30.9M
    for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
999
25.1M
      int this_rd;
1000
25.1M
      int distortion;
1001
25.1M
      int labelyrate;
1002
25.1M
      ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1003
25.1M
      ENTROPY_CONTEXT *ta_s;
1004
25.1M
      ENTROPY_CONTEXT *tl_s;
1005
1006
25.1M
      t_above_s = t_above;
1007
25.1M
      t_left_s = t_left;
1008
1009
25.1M
      ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1010
25.1M
      tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1011
1012
25.1M
      if (this_mode == NEW4X4) {
1013
6.28M
        int sseshift;
1014
6.28M
        int num00;
1015
6.28M
        int step_param = 0;
1016
6.28M
        int further_steps;
1017
6.28M
        int n;
1018
6.28M
        int thissme;
1019
6.28M
        int bestsme = INT_MAX;
1020
6.28M
        int_mv temp_mv;
1021
6.28M
        BLOCK *c;
1022
6.28M
        BLOCKD *e;
1023
1024
        /* Is the best so far sufficiently good that we can't justify
1025
         * doing a new motion search.
1026
         */
1027
6.28M
        if (best_label_rd < label_mv_thresh) break;
1028
1029
5.78M
        if (cpi->compressor_speed) {
1030
5.78M
          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
1031
1.44M
            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1032
1.44M
            if (i == 1 && segmentation == BLOCK_16X8) {
1033
343k
              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1034
343k
            }
1035
1036
1.44M
            step_param = bsi->sv_istep[i];
1037
1.44M
          }
1038
1039
          /* use previous block's result as next block's MV
1040
           * predictor.
1041
           */
1042
5.78M
          if (segmentation == BLOCK_4X4 && i > 0) {
1043
1.96M
            bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
1044
1.96M
            if (i == 4 || i == 8 || i == 12) {
1045
403k
              bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
1046
403k
            }
1047
1.96M
            step_param = 2;
1048
1.96M
          }
1049
5.78M
        }
1050
1051
5.78M
        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1052
1053
5.78M
        {
1054
5.78M
          int sadpb = x->sadperbit4;
1055
5.78M
          int_mv mvp_full;
1056
1057
5.78M
          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1058
5.78M
          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1059
1060
          /* find first label */
1061
5.78M
          n = vp8_mbsplit_offset[segmentation][i];
1062
1063
5.78M
          c = &x->block[n];
1064
5.78M
          e = &x->e_mbd.block[n];
1065
1066
5.78M
          {
1067
5.78M
            bestsme = cpi->diamond_search_sad(
1068
5.78M
                x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
1069
5.78M
                v_fn_ptr, x->mvcost, bsi->ref_mv);
1070
1071
5.78M
            n = num00;
1072
5.78M
            num00 = 0;
1073
1074
22.5M
            while (n < further_steps) {
1075
16.8M
              n++;
1076
1077
16.8M
              if (num00) {
1078
2.09M
                num00--;
1079
14.7M
              } else {
1080
14.7M
                thissme = cpi->diamond_search_sad(
1081
14.7M
                    x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
1082
14.7M
                    v_fn_ptr, x->mvcost, bsi->ref_mv);
1083
1084
14.7M
                if (thissme < bestsme) {
1085
2.74M
                  bestsme = thissme;
1086
2.74M
                  mode_mv[NEW4X4].as_int = temp_mv.as_int;
1087
2.74M
                }
1088
14.7M
              }
1089
16.8M
            }
1090
5.78M
          }
1091
1092
5.78M
          sseshift = segmentation_to_sseshift[segmentation];
1093
1094
          /* Should we do a full search (best quality only) */
1095
5.78M
          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
1096
            /* Check if mvp_full is within the range. */
1097
0
            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1098
0
                         x->mv_row_max);
1099
1100
0
            thissme = vp8_full_search_sad(x, c, e, &mvp_full, sadpb, 16,
1101
0
                                          v_fn_ptr, x->mvcost, bsi->ref_mv);
1102
1103
0
            if (thissme < bestsme) {
1104
0
              bestsme = thissme;
1105
0
              mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1106
0
            } else {
1107
              /* The full search result is actually worse so
1108
               * re-instate the previous best vector
1109
               */
1110
0
              e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1111
0
            }
1112
0
          }
1113
5.78M
        }
1114
1115
5.78M
        if (bestsme < INT_MAX) {
1116
5.78M
          int disto;
1117
5.78M
          unsigned int sse;
1118
5.78M
          cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
1119
5.78M
                                       x->errorperbit, v_fn_ptr, x->mvcost,
1120
5.78M
                                       &disto, &sse);
1121
5.78M
        }
1122
5.78M
      } /* NEW4X4 */
1123
1124
24.6M
      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1125
24.6M
                         bsi->ref_mv, x->mvcost);
1126
1127
      /* Trap vectors that reach beyond the UMV borders */
1128
24.6M
      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1129
24.6M
          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1130
24.4M
          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1131
24.4M
          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1132
421k
        continue;
1133
421k
      }
1134
1135
24.2M
      distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
1136
1137
24.2M
      labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1138
24.2M
      rate += labelyrate;
1139
1140
24.2M
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1141
1142
24.2M
      if (this_rd < best_label_rd) {
1143
10.6M
        sbr = rate;
1144
10.6M
        sbd = distortion;
1145
10.6M
        bestlabelyrate = labelyrate;
1146
10.6M
        mode_selected = this_mode;
1147
10.6M
        best_label_rd = this_rd;
1148
1149
10.6M
        t_above_b = t_above_s;
1150
10.6M
        t_left_b = t_left_s;
1151
10.6M
      }
1152
24.2M
    } /*for each 4x4 mode*/
1153
1154
6.28M
    t_above = t_above_b;
1155
6.28M
    t_left = t_left_b;
1156
1157
6.28M
    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1158
6.28M
                bsi->ref_mv, x->mvcost);
1159
1160
6.28M
    br += sbr;
1161
6.28M
    bd += sbd;
1162
6.28M
    segmentyrate += bestlabelyrate;
1163
6.28M
    this_segment_rd += best_label_rd;
1164
1165
6.28M
    if (this_segment_rd >= bsi->segment_rd) break;
1166
1167
6.28M
  } /* for each label */
1168
1169
1.65M
  if (this_segment_rd < bsi->segment_rd) {
1170
775k
    bsi->r = br;
1171
775k
    bsi->d = bd;
1172
775k
    bsi->segment_yrate = segmentyrate;
1173
775k
    bsi->segment_rd = this_segment_rd;
1174
775k
    bsi->segment_num = segmentation;
1175
1176
    /* store everything needed to come back to this!! */
1177
13.1M
    for (i = 0; i < 16; ++i) {
1178
12.4M
      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1179
12.4M
      bsi->modes[i] = x->partition_info->bmi[i].mode;
1180
12.4M
      bsi->eobs[i] = x->e_mbd.eobs[i];
1181
12.4M
    }
1182
775k
  }
1183
1.65M
}
1184
1185
1.54M
static void vp8_cal_step_param(int sr, int *sp) {
1186
1.54M
  int step = 0;
1187
1188
1.54M
  if (sr > MAX_FIRST_STEP) {
1189
50.4k
    sr = MAX_FIRST_STEP;
1190
1.49M
  } else if (sr < 1) {
1191
708k
    sr = 1;
1192
708k
  }
1193
1194
4.60M
  while (sr >>= 1) step++;
1195
1196
1.54M
  *sp = MAX_MVSEARCH_STEPS - 1 - step;
1197
1.54M
}
1198
1199
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1200
                                           int_mv *best_ref_mv, int best_rd,
1201
                                           int *mdcounts, int *returntotrate,
1202
                                           int *returnyrate,
1203
                                           int *returndistortion,
1204
732k
                                           int mvthresh) {
1205
732k
  int i;
1206
732k
  BEST_SEG_INFO bsi;
1207
1208
732k
  memset(&bsi, 0, sizeof(bsi));
1209
1210
732k
  bsi.segment_rd = best_rd;
1211
732k
  bsi.ref_mv = best_ref_mv;
1212
732k
  bsi.mvp.as_int = best_ref_mv->as_int;
1213
732k
  bsi.mvthresh = mvthresh;
1214
732k
  bsi.mdcounts = mdcounts;
1215
1216
12.4M
  for (i = 0; i < 16; ++i) {
1217
11.7M
    bsi.modes[i] = ZERO4X4;
1218
11.7M
  }
1219
1220
732k
  if (cpi->compressor_speed == 0) {
1221
    /* for now, we will keep the original segmentation order
1222
       when in best quality mode */
1223
0
    rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1224
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1225
0
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1226
0
    rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1227
732k
  } else {
1228
732k
    int sr;
1229
1230
732k
    rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1231
1232
732k
    if (bsi.segment_rd < best_rd) {
1233
385k
      int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
1234
385k
      int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
1235
385k
      int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
1236
385k
      int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
1237
1238
385k
      int tmp_col_min = x->mv_col_min;
1239
385k
      int tmp_col_max = x->mv_col_max;
1240
385k
      int tmp_row_min = x->mv_row_min;
1241
385k
      int tmp_row_max = x->mv_row_max;
1242
1243
      /* Get intersection of UMV window and valid MV window to reduce # of
1244
       * checks in diamond search. */
1245
385k
      if (x->mv_col_min < col_min) x->mv_col_min = col_min;
1246
385k
      if (x->mv_col_max > col_max) x->mv_col_max = col_max;
1247
385k
      if (x->mv_row_min < row_min) x->mv_row_min = row_min;
1248
385k
      if (x->mv_row_max > row_max) x->mv_row_max = row_max;
1249
1250
      /* Get 8x8 result */
1251
385k
      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1252
385k
      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1253
385k
      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1254
385k
      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1255
1256
      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
1257
       * according to the closeness of 2 MV. */
1258
      /* block 8X16 */
1259
385k
      {
1260
385k
        sr =
1261
385k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
1262
385k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
1263
385k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1264
1265
385k
        sr =
1266
385k
            MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1267
385k
                 (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1268
385k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1269
1270
385k
        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1271
385k
      }
1272
1273
      /* block 16X8 */
1274
385k
      {
1275
385k
        sr =
1276
385k
            MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
1277
385k
                 (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
1278
385k
        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1279
1280
385k
        sr =
1281
385k
            MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
1282
385k
                 (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
1283
385k
        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1284
1285
385k
        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1286
385k
      }
1287
1288
      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1289
      /* Not skip 4x4 if speed=0 (good quality) */
1290
385k
      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
1291
      /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1292
147k
      {
1293
147k
        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1294
147k
        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1295
147k
      }
1296
1297
      /* restore UMV window */
1298
385k
      x->mv_col_min = tmp_col_min;
1299
385k
      x->mv_col_max = tmp_col_max;
1300
385k
      x->mv_row_min = tmp_row_min;
1301
385k
      x->mv_row_max = tmp_row_max;
1302
385k
    }
1303
732k
  }
1304
1305
  /* set it to the best */
1306
12.4M
  for (i = 0; i < 16; ++i) {
1307
11.7M
    BLOCKD *bd = &x->e_mbd.block[i];
1308
1309
11.7M
    bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1310
11.7M
    *bd->eob = bsi.eobs[i];
1311
11.7M
  }
1312
1313
732k
  *returntotrate = bsi.r;
1314
732k
  *returndistortion = bsi.d;
1315
732k
  *returnyrate = bsi.segment_yrate;
1316
1317
  /* save partitions */
1318
732k
  x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1319
732k
  x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1320
1321
3.74M
  for (i = 0; i < x->partition_info->count; ++i) {
1322
3.01M
    int j;
1323
1324
3.01M
    j = vp8_mbsplit_offset[bsi.segment_num][i];
1325
1326
3.01M
    x->partition_info->bmi[i].mode = bsi.modes[j];
1327
3.01M
    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1328
3.01M
  }
1329
  /*
1330
   * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1331
   */
1332
732k
  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1333
1334
732k
  return bsi.segment_rd;
1335
732k
}
1336
1337
/* The improved MV prediction */
1338
void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
1339
                 int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
1340
1.65M
                 int near_sadidx[]) {
1341
1.65M
  const MODE_INFO *above = here - xd->mode_info_stride;
1342
1.65M
  const MODE_INFO *left = here - 1;
1343
1.65M
  const MODE_INFO *aboveleft = above - 1;
1344
1.65M
  int_mv near_mvs[8];
1345
1.65M
  int near_ref[8];
1346
1.65M
  int_mv mv;
1347
1.65M
  int vcnt = 0;
1348
1.65M
  int find = 0;
1349
1.65M
  int mb_offset;
1350
1351
1.65M
  int mvx[8];
1352
1.65M
  int mvy[8];
1353
1.65M
  int i;
1354
1355
1.65M
  mv.as_int = 0;
1356
1357
1.65M
  if (here->mbmi.ref_frame != INTRA_FRAME) {
1358
1.65M
    near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
1359
1.65M
        near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
1360
1.65M
            near_mvs[6].as_int = near_mvs[7].as_int = 0;
1361
1.65M
    near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
1362
1.65M
        near_ref[5] = near_ref[6] = near_ref[7] = 0;
1363
1364
    /* read in 3 nearby block's MVs from current frame as prediction
1365
     * candidates.
1366
     */
1367
1.65M
    if (above->mbmi.ref_frame != INTRA_FRAME) {
1368
490k
      near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1369
490k
      mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
1370
490k
              &near_mvs[vcnt], ref_frame_sign_bias);
1371
490k
      near_ref[vcnt] = above->mbmi.ref_frame;
1372
490k
    }
1373
1.65M
    vcnt++;
1374
1.65M
    if (left->mbmi.ref_frame != INTRA_FRAME) {
1375
666k
      near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1376
666k
      mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
1377
666k
              &near_mvs[vcnt], ref_frame_sign_bias);
1378
666k
      near_ref[vcnt] = left->mbmi.ref_frame;
1379
666k
    }
1380
1.65M
    vcnt++;
1381
1.65M
    if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
1382
383k
      near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1383
383k
      mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
1384
383k
              &near_mvs[vcnt], ref_frame_sign_bias);
1385
383k
      near_ref[vcnt] = aboveleft->mbmi.ref_frame;
1386
383k
    }
1387
1.65M
    vcnt++;
1388
1389
    /* read in 5 nearby block's MVs from last frame. */
1390
1.65M
    if (cpi->common.last_frame_type != KEY_FRAME) {
1391
997k
      mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
1392
997k
                  (-xd->mb_to_left_edge / 128 + 1);
1393
1394
      /* current in last frame */
1395
997k
      if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
1396
577k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1397
577k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
1398
577k
                &near_mvs[vcnt], ref_frame_sign_bias);
1399
577k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
1400
577k
      }
1401
997k
      vcnt++;
1402
1403
      /* above in last frame */
1404
997k
      if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
1405
997k
          INTRA_FRAME) {
1406
330k
        near_mvs[vcnt].as_int =
1407
330k
            cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
1408
330k
        mv_bias(
1409
330k
            cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
1410
330k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1411
330k
        near_ref[vcnt] =
1412
330k
            cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
1413
330k
      }
1414
997k
      vcnt++;
1415
1416
      /* left in last frame */
1417
997k
      if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
1418
401k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
1419
401k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
1420
401k
                &near_mvs[vcnt], ref_frame_sign_bias);
1421
401k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
1422
401k
      }
1423
997k
      vcnt++;
1424
1425
      /* right in last frame */
1426
997k
      if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
1427
403k
        near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
1428
403k
        mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
1429
403k
                &near_mvs[vcnt], ref_frame_sign_bias);
1430
403k
        near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
1431
403k
      }
1432
997k
      vcnt++;
1433
1434
      /* below in last frame */
1435
997k
      if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
1436
997k
          INTRA_FRAME) {
1437
327k
        near_mvs[vcnt].as_int =
1438
327k
            cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
1439
327k
        mv_bias(
1440
327k
            cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
1441
327k
            refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1442
327k
        near_ref[vcnt] =
1443
327k
            cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
1444
327k
      }
1445
997k
      vcnt++;
1446
997k
    }
1447
1448
6.89M
    for (i = 0; i < vcnt; ++i) {
1449
6.13M
      if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
1450
2.08M
        if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
1451
894k
          mv.as_int = near_mvs[near_sadidx[i]].as_int;
1452
894k
          find = 1;
1453
894k
          if (i < 3) {
1454
810k
            *sr = 3;
1455
810k
          } else {
1456
84.9k
            *sr = 2;
1457
84.9k
          }
1458
894k
          break;
1459
894k
        }
1460
2.08M
      }
1461
6.13M
    }
1462
1463
1.65M
    if (!find) {
1464
5.34M
      for (i = 0; i < vcnt; ++i) {
1465
4.59M
        mvx[i] = near_mvs[i].as_mv.row;
1466
4.59M
        mvy[i] = near_mvs[i].as_mv.col;
1467
4.59M
      }
1468
1469
759k
      insertsortmv(mvx, vcnt);
1470
759k
      insertsortmv(mvy, vcnt);
1471
759k
      mv.as_mv.row = mvx[vcnt / 2];
1472
759k
      mv.as_mv.col = mvy[vcnt / 2];
1473
1474
      /* sr is set to 0 to allow calling function to decide the search
1475
       * range.
1476
       */
1477
759k
      *sr = 0;
1478
759k
    }
1479
1.65M
  }
1480
1481
  /* Set up return values */
1482
1.65M
  mvp->as_int = mv.as_int;
1483
1.65M
  vp8_clamp_mv2(mvp, xd);
1484
1.65M
}
1485
1486
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
1487
1.20M
                 int recon_yoffset, int near_sadidx[]) {
1488
  /* near_sad indexes:
1489
   *   0-cf above, 1-cf left, 2-cf aboveleft,
1490
   *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1491
   */
1492
1.20M
  int near_sad[8] = { 0 };
1493
1.20M
  BLOCK *b = &x->block[0];
1494
1.20M
  unsigned char *src_y_ptr = *(b->base_src);
1495
1496
  /* calculate sad for current frame 3 nearby MBs. */
1497
1.20M
  if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
1498
82.0k
    near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1499
1.12M
  } else if (xd->mb_to_top_edge ==
1500
1.12M
             0) { /* only has left MB for sad calculation. */
1501
470k
    near_sad[0] = near_sad[2] = INT_MAX;
1502
470k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1503
470k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1504
651k
  } else if (xd->mb_to_left_edge ==
1505
651k
             0) { /* only has left MB for sad calculation. */
1506
115k
    near_sad[1] = near_sad[2] = INT_MAX;
1507
115k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1508
115k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1509
115k
        xd->dst.y_stride);
1510
535k
  } else {
1511
535k
    near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
1512
535k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
1513
535k
        xd->dst.y_stride);
1514
535k
    near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
1515
535k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
1516
535k
    near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
1517
535k
        src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
1518
535k
        xd->dst.y_stride);
1519
535k
  }
1520
1521
1.20M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1522
    /* calculate sad for last frame 5 nearby MBs. */
1523
547k
    unsigned char *pre_y_buffer =
1524
547k
        cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1525
547k
    int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1526
1527
547k
    if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
1528
547k
    if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
1529
547k
    if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
1530
547k
    if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
1531
1532
547k
    if (near_sad[4] != INT_MAX) {
1533
317k
      near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
1534
317k
          src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
1535
317k
          pre_y_stride);
1536
317k
    }
1537
547k
    if (near_sad[5] != INT_MAX) {
1538
406k
      near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
1539
406k
          src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
1540
406k
    }
1541
547k
    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
1542
547k
                                               pre_y_buffer, pre_y_stride);
1543
547k
    if (near_sad[6] != INT_MAX) {
1544
408k
      near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
1545
408k
          src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
1546
408k
    }
1547
547k
    if (near_sad[7] != INT_MAX) {
1548
341k
      near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
1549
341k
          src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
1550
341k
          pre_y_stride);
1551
341k
    }
1552
547k
  }
1553
1554
1.20M
  if (cpi->common.last_frame_type != KEY_FRAME) {
1555
547k
    insertsortsad(near_sad, near_sadidx, 8);
1556
656k
  } else {
1557
656k
    insertsortsad(near_sad, near_sadidx, 3);
1558
656k
  }
1559
1.20M
}
1560
1561
767k
static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
1562
767k
  if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
1563
176k
    int i;
1564
1565
1.56M
    for (i = 0; i < x->partition_info->count; ++i) {
1566
1.38M
      if (x->partition_info->bmi[i].mode == NEW4X4) {
1567
603k
        const int row_val = ((x->partition_info->bmi[i].mv.as_mv.row -
1568
603k
                              best_ref_mv->as_mv.row) >>
1569
603k
                             1);
1570
603k
        const int row_idx = mv_max + row_val;
1571
603k
        const int col_val = ((x->partition_info->bmi[i].mv.as_mv.col -
1572
603k
                              best_ref_mv->as_mv.col) >>
1573
603k
                             1);
1574
603k
        const int col_idx = mv_max + col_val;
1575
603k
        if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 &&
1576
603k
            col_idx < MVvals) {
1577
603k
          x->MVcount[0][row_idx]++;
1578
603k
          x->MVcount[1][col_idx]++;
1579
603k
        }
1580
603k
      }
1581
1.38M
    }
1582
591k
  } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
1583
85.7k
    const int row_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
1584
85.7k
                          best_ref_mv->as_mv.row) >>
1585
85.7k
                         1);
1586
85.7k
    const int row_idx = mv_max + row_val;
1587
85.7k
    const int col_val = ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
1588
85.7k
                          best_ref_mv->as_mv.col) >>
1589
85.7k
                         1);
1590
85.7k
    const int col_idx = mv_max + col_val;
1591
85.7k
    if (row_idx >= 0 && row_idx < MVvals && col_idx >= 0 && col_idx < MVvals) {
1592
85.7k
      x->MVcount[0][row_idx]++;
1593
85.7k
      x->MVcount[1][col_idx]++;
1594
85.7k
    }
1595
85.7k
  }
1596
767k
}
1597
1598
static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
1599
                                  int *disable_skip, VP8_COMP *cpi,
1600
2.93M
                                  MACROBLOCK *x) {
1601
2.93M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1602
2.93M
  BLOCK *b = &x->block[0];
1603
2.93M
  MACROBLOCKD *xd = &x->e_mbd;
1604
2.93M
  int distortion;
1605
2.93M
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
1606
1607
2.93M
  if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
1608
0
    x->skip = 1;
1609
2.93M
  } else if (x->encode_breakout) {
1610
0
    unsigned int sse;
1611
0
    unsigned int var;
1612
0
    unsigned int threshold =
1613
0
        (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
1614
1615
0
    if (threshold < x->encode_breakout) threshold = x->encode_breakout;
1616
1617
0
    var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
1618
0
                            16, &sse);
1619
1620
0
    if (sse < threshold) {
1621
0
      unsigned int q2dc = xd->block[24].dequant[0];
1622
      /* If theres is no codeable 2nd order dc
1623
         or a very small uniform pixel change change */
1624
0
      if ((sse - var < q2dc * q2dc >> 4) || (sse / 2 > var && sse - var < 64)) {
1625
        /* Check u and v to make sure skip is ok */
1626
0
        unsigned int sse2 = VP8_UVSSE(x);
1627
0
        if (sse2 * 2 < threshold) {
1628
0
          x->skip = 1;
1629
0
          rd->distortion2 = sse + sse2;
1630
0
          rd->rate2 = 500;
1631
1632
          /* for best_yrd calculation */
1633
0
          rd->rate_uv = 0;
1634
0
          rd->distortion_uv = sse2;
1635
1636
0
          *disable_skip = 1;
1637
0
          return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1638
0
        }
1639
0
      }
1640
0
    }
1641
0
  }
1642
1643
  /* Add in the Mv/mode cost */
1644
2.93M
  rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
1645
1646
  /* Y cost and distortion */
1647
2.93M
  macro_block_yrd(x, &rd->rate_y, &distortion);
1648
2.93M
  rd->rate2 += rd->rate_y;
1649
2.93M
  rd->distortion2 += distortion;
1650
1651
  /* UV cost and distortion */
1652
2.93M
  rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
1653
2.93M
                   cpi->common.full_pixel);
1654
2.93M
  rd->rate2 += rd->rate_uv;
1655
2.93M
  rd->distortion2 += rd->distortion_uv;
1656
2.93M
  return INT_MAX;
1657
2.93M
}
1658
1659
static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
1660
                                    int *other_cost, int disable_skip,
1661
                                    int uv_intra_tteob, int intra_rd_penalty,
1662
6.83M
                                    VP8_COMP *cpi, MACROBLOCK *x) {
1663
6.83M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1664
1665
  /* Where skip is allowable add in the default per mb cost for the no
1666
   * skip case. where we then decide to skip we have to delete this and
1667
   * replace it with the cost of signalling a skip
1668
   */
1669
6.83M
  if (cpi->common.mb_no_coeff_skip) {
1670
6.83M
    *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
1671
6.83M
    rd->rate2 += *other_cost;
1672
6.83M
  }
1673
1674
  /* Estimate the reference frame signaling cost and add it
1675
   * to the rolling cost variable.
1676
   */
1677
6.83M
  rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1678
1679
6.83M
  if (!disable_skip) {
1680
    /* Test for the condition where skip block will be activated
1681
     * because there are no non zero coefficients and make any
1682
     * necessary adjustment for rate
1683
     */
1684
6.16M
    if (cpi->common.mb_no_coeff_skip) {
1685
6.16M
      int i;
1686
6.16M
      int tteob;
1687
6.16M
      int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
1688
1689
6.16M
      tteob = 0;
1690
6.16M
      if (has_y2_block) tteob += x->e_mbd.eobs[24];
1691
1692
104M
      for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
1693
1694
6.16M
      if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1695
29.8M
        for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
1696
3.32M
      } else {
1697
2.84M
        tteob += uv_intra_tteob;
1698
2.84M
      }
1699
1700
6.16M
      if (tteob == 0) {
1701
402k
        rd->rate2 -= (rd->rate_y + rd->rate_uv);
1702
        /* for best_yrd calculation */
1703
402k
        rd->rate_uv = 0;
1704
1705
        /* Back out no skip flag costing and add in skip flag costing */
1706
402k
        if (cpi->prob_skip_false) {
1707
402k
          int prob_skip_cost;
1708
1709
402k
          prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
1710
402k
          prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
1711
402k
          rd->rate2 += prob_skip_cost;
1712
402k
          *other_cost += prob_skip_cost;
1713
402k
        }
1714
402k
      }
1715
6.16M
    }
1716
    /* Calculate the final RD estimate for this mode */
1717
6.16M
    this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
1718
6.16M
    if (this_rd < INT_MAX &&
1719
6.16M
        x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1720
2.84M
      this_rd += intra_rd_penalty;
1721
2.84M
    }
1722
6.16M
  }
1723
6.83M
  return this_rd;
1724
6.83M
}
1725
1726
static void update_best_mode(BEST_MODE *best_mode, int this_rd,
1727
                             RATE_DISTORTION *rd, int other_cost,
1728
2.45M
                             MACROBLOCK *x) {
1729
2.45M
  MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
1730
1731
2.45M
  other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
1732
1733
  /* Calculate the final y RD estimate for this mode */
1734
2.45M
  best_mode->yrd =
1735
2.45M
      RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
1736
2.45M
             (rd->distortion2 - rd->distortion_uv));
1737
1738
2.45M
  best_mode->rd = this_rd;
1739
2.45M
  best_mode->mbmode = x->e_mbd.mode_info_context->mbmi;
1740
2.45M
  best_mode->partition = *x->partition_info;
1741
1742
2.45M
  if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
1743
516k
    int i;
1744
8.78M
    for (i = 0; i < 16; ++i) {
1745
8.27M
      best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
1746
8.27M
    }
1747
516k
  }
1748
2.45M
}
1749
1750
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
1751
                            int recon_uvoffset, int *returnrate,
1752
                            int *returndistortion, int *returnintra, int mb_row,
1753
767k
                            int mb_col) {
1754
767k
  BLOCK *b = &x->block[0];
1755
767k
  BLOCKD *d = &x->e_mbd.block[0];
1756
767k
  MACROBLOCKD *xd = &x->e_mbd;
1757
767k
  int_mv best_ref_mv_sb[2];
1758
767k
  int_mv mode_mv_sb[2][MB_MODE_COUNT];
1759
767k
  int_mv best_ref_mv;
1760
767k
  int_mv *mode_mv;
1761
767k
  MB_PREDICTION_MODE this_mode;
1762
767k
  int num00;
1763
767k
  int best_mode_index = 0;
1764
767k
  BEST_MODE best_mode;
1765
1766
767k
  int i;
1767
767k
  int mode_index;
1768
767k
  int mdcounts[4];
1769
767k
  int rate;
1770
767k
  RATE_DISTORTION rd;
1771
767k
  int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1772
767k
  int uv_intra_tteob = 0;
1773
767k
  int uv_intra_done = 0;
1774
1775
767k
  MB_PREDICTION_MODE uv_intra_mode = 0;
1776
767k
  int_mv mvp;
1777
767k
  int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1778
767k
  int saddone = 0;
1779
  /* search range got from mv_pred(). It uses step_param levels. (0-7) */
1780
767k
  int sr = 0;
1781
1782
767k
  unsigned char *plane[4][3] = { { 0, 0 } };
1783
767k
  int ref_frame_map[4];
1784
767k
  int sign_bias = 0;
1785
1786
767k
  int intra_rd_penalty =
1787
767k
      10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
1788
1789
767k
#if CONFIG_TEMPORAL_DENOISING
1790
767k
  unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
1791
767k
               best_rd_sse = UINT_MAX;
1792
767k
#endif
1793
1794
  // _uv variables are not set consistantly before calling update_best_mode.
1795
767k
  rd.rate_uv = 0;
1796
767k
  rd.distortion_uv = 0;
1797
1798
767k
  mode_mv = mode_mv_sb[sign_bias];
1799
767k
  best_ref_mv.as_int = 0;
1800
767k
  best_mode.rd = INT_MAX;
1801
767k
  best_mode.yrd = INT_MAX;
1802
767k
  best_mode.intra_rd = INT_MAX;
1803
767k
  memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
1804
767k
  memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
1805
767k
  memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
1806
1807
  /* Setup search priorities */
1808
767k
  get_reference_search_order(cpi, ref_frame_map);
1809
1810
  /* Check to see if there is at least 1 valid reference frame that we need
1811
   * to calculate near_mvs.
1812
   */
1813
767k
  if (ref_frame_map[1] > 0) {
1814
767k
    sign_bias = vp8_find_near_mvs_bias(
1815
767k
        &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
1816
767k
        mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
1817
1818
767k
    mode_mv = mode_mv_sb[sign_bias];
1819
767k
    best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1820
767k
  }
1821
1822
767k
  get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
1823
1824
767k
  *returnintra = INT_MAX;
1825
  /* Count of the number of MBs tested so far this frame */
1826
767k
  x->mbs_tested_so_far++;
1827
1828
767k
  x->skip = 0;
1829
1830
16.1M
  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
1831
15.3M
    int this_rd = INT_MAX;
1832
15.3M
    int disable_skip = 0;
1833
15.3M
    int other_cost = 0;
1834
15.3M
    int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
1835
1836
    /* Test best rd so far against threshold for trying this mode. */
1837
15.3M
    if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
1838
1839
13.7M
    if (this_ref_frame < 0) continue;
1840
1841
    /* These variables hold are rolling total cost and distortion for
1842
     * this mode
1843
     */
1844
8.67M
    rd.rate2 = 0;
1845
8.67M
    rd.distortion2 = 0;
1846
1847
8.67M
    this_mode = vp8_mode_order[mode_index];
1848
1849
8.67M
    x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1850
8.67M
    x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
1851
1852
    /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1853
     * unless ARNR filtering is enabled in which case we want
1854
     * an unfiltered alternative
1855
     */
1856
8.67M
    if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
1857
0
      if (this_mode != ZEROMV ||
1858
0
          x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
1859
0
        continue;
1860
0
      }
1861
0
    }
1862
1863
    /* everything but intra */
1864
8.67M
    if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
1865
5.50M
      assert(plane[this_ref_frame][0] != NULL &&
1866
5.50M
             plane[this_ref_frame][1] != NULL &&
1867
5.50M
             plane[this_ref_frame][2] != NULL);
1868
5.50M
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
1869
5.50M
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
1870
5.50M
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
1871
1872
5.50M
      if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
1873
0
        sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
1874
0
        mode_mv = mode_mv_sb[sign_bias];
1875
0
        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
1876
0
      }
1877
5.50M
    }
1878
1879
    /* Check to see if the testing frequency for this mode is at its
1880
     * max If so then prevent it from being tested and increase the
1881
     * threshold for its testing
1882
     */
1883
8.67M
    if (x->mode_test_hit_counts[mode_index] &&
1884
7.81M
        (cpi->mode_check_freq[mode_index] > 1)) {
1885
235k
      if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
1886
235k
                                      x->mode_test_hit_counts[mode_index]) {
1887
        /* Increase the threshold for coding this mode to make it
1888
         * less likely to be chosen
1889
         */
1890
129k
        x->rd_thresh_mult[mode_index] += 4;
1891
1892
129k
        if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
1893
26.4k
          x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1894
26.4k
        }
1895
1896
129k
        x->rd_threshes[mode_index] =
1897
129k
            (cpi->rd_baseline_thresh[mode_index] >> 7) *
1898
129k
            x->rd_thresh_mult[mode_index];
1899
1900
129k
        continue;
1901
129k
      }
1902
235k
    }
1903
1904
    /* We have now reached the point where we are going to test the
1905
     * current mode so increment the counter for the number of times
1906
     * it has been tested
1907
     */
1908
8.54M
    x->mode_test_hit_counts[mode_index]++;
1909
1910
    /* Experimental code. Special case for gf and arf zeromv modes.
1911
     * Increase zbin size to supress noise
1912
     */
1913
8.54M
    if (x->zbin_mode_boost_enabled) {
1914
0
      if (this_ref_frame == INTRA_FRAME) {
1915
0
        x->zbin_mode_boost = 0;
1916
0
      } else {
1917
0
        if (vp8_mode_order[mode_index] == ZEROMV) {
1918
0
          if (this_ref_frame != LAST_FRAME) {
1919
0
            x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1920
0
          } else {
1921
0
            x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1922
0
          }
1923
0
        } else if (vp8_mode_order[mode_index] == SPLITMV) {
1924
0
          x->zbin_mode_boost = 0;
1925
0
        } else {
1926
0
          x->zbin_mode_boost = MV_ZBIN_BOOST;
1927
0
        }
1928
0
      }
1929
1930
0
      vp8_update_zbin_extra(cpi, x);
1931
0
    }
1932
1933
8.54M
    if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
1934
767k
      rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
1935
767k
                              &uv_intra_distortion);
1936
767k
      uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1937
1938
      /*
1939
       * Total of the eobs is used later to further adjust rate2. Since uv
1940
       * block's intra eobs will be overwritten when we check inter modes,
1941
       * we need to save uv_intra_tteob here.
1942
       */
1943
6.90M
      for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
1944
1945
767k
      uv_intra_done = 1;
1946
767k
    }
1947
1948
8.54M
    switch (this_mode) {
1949
532k
      case B_PRED: {
1950
532k
        int tmp_rd;
1951
1952
        /* Note the rate value returned here includes the cost of
1953
         * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
1954
         */
1955
532k
        int distortion;
1956
532k
        tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
1957
532k
                                           best_mode.yrd);
1958
532k
        rd.rate2 += rate;
1959
532k
        rd.distortion2 += distortion;
1960
1961
532k
        if (tmp_rd < best_mode.yrd) {
1962
206k
          assert(uv_intra_done);
1963
206k
          rd.rate2 += uv_intra_rate;
1964
206k
          rd.rate_uv = uv_intra_rate_tokenonly;
1965
206k
          rd.distortion2 += uv_intra_distortion;
1966
206k
          rd.distortion_uv = uv_intra_distortion;
1967
326k
        } else {
1968
326k
          this_rd = INT_MAX;
1969
326k
          disable_skip = 1;
1970
326k
        }
1971
532k
        break;
1972
0
      }
1973
1974
732k
      case SPLITMV: {
1975
732k
        int tmp_rd;
1976
732k
        int this_rd_thresh;
1977
732k
        int distortion;
1978
1979
732k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
1980
732k
                             ? x->rd_threshes[THR_NEW1]
1981
732k
                             : x->rd_threshes[THR_NEW3];
1982
732k
        this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
1983
732k
                             ? x->rd_threshes[THR_NEW2]
1984
732k
                             : this_rd_thresh;
1985
1986
732k
        tmp_rd = vp8_rd_pick_best_mbsegmentation(
1987
732k
            cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
1988
732k
            &distortion, this_rd_thresh);
1989
1990
732k
        rd.rate2 += rate;
1991
732k
        rd.distortion2 += distortion;
1992
1993
        /* If even the 'Y' rd value of split is higher than best so far
1994
         * then don't bother looking at UV
1995
         */
1996
732k
        if (tmp_rd < best_mode.yrd) {
1997
          /* Now work out UV cost and add it in */
1998
385k
          rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
1999
385k
                         cpi->common.full_pixel);
2000
385k
          rd.rate2 += rd.rate_uv;
2001
385k
          rd.distortion2 += rd.distortion_uv;
2002
385k
        } else {
2003
347k
          this_rd = INT_MAX;
2004
347k
          disable_skip = 1;
2005
347k
        }
2006
732k
        break;
2007
0
      }
2008
767k
      case DC_PRED:
2009
1.39M
      case V_PRED:
2010
2.02M
      case H_PRED:
2011
2.63M
      case TM_PRED: {
2012
2.63M
        int distortion;
2013
2.63M
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2014
2015
2.63M
        vp8_build_intra_predictors_mby_s(
2016
2.63M
            xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
2017
2.63M
            xd->dst.y_stride, xd->predictor, 16);
2018
2.63M
        macro_block_yrd(x, &rd.rate_y, &distortion);
2019
2.63M
        rd.rate2 += rd.rate_y;
2020
2.63M
        rd.distortion2 += distortion;
2021
2.63M
        rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
2022
2.63M
                                  [x->e_mbd.mode_info_context->mbmi.mode];
2023
2.63M
        assert(uv_intra_done);
2024
2.63M
        rd.rate2 += uv_intra_rate;
2025
2.63M
        rd.rate_uv = uv_intra_rate_tokenonly;
2026
2.63M
        rd.distortion2 += uv_intra_distortion;
2027
2.63M
        rd.distortion_uv = uv_intra_distortion;
2028
2.63M
        break;
2029
2.02M
      }
2030
2031
994k
      case NEWMV: {
2032
994k
        int thissme;
2033
994k
        int bestsme = INT_MAX;
2034
994k
        int step_param = cpi->sf.first_step;
2035
994k
        int further_steps;
2036
994k
        int n;
2037
        /* If last step (1-away) of n-step search doesn't pick the center point
2038
           as the best match, we will do a final 1-away diamond refining search
2039
        */
2040
994k
        int do_refine = 1;
2041
2042
994k
        int sadpb = x->sadperbit16;
2043
994k
        int_mv mvp_full;
2044
2045
994k
        int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
2046
994k
        int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
2047
994k
        int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
2048
994k
        int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
2049
2050
994k
        int tmp_col_min = x->mv_col_min;
2051
994k
        int tmp_col_max = x->mv_col_max;
2052
994k
        int tmp_row_min = x->mv_row_min;
2053
994k
        int tmp_row_max = x->mv_row_max;
2054
2055
994k
        if (!saddone) {
2056
652k
          vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
2057
652k
          saddone = 1;
2058
652k
        }
2059
2060
994k
        vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
2061
994k
                    x->e_mbd.mode_info_context->mbmi.ref_frame,
2062
994k
                    cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
2063
2064
994k
        mvp_full.as_mv.col = mvp.as_mv.col >> 3;
2065
994k
        mvp_full.as_mv.row = mvp.as_mv.row >> 3;
2066
2067
        /* Get intersection of UMV window and valid MV window to
2068
         * reduce # of checks in diamond search.
2069
         */
2070
994k
        if (x->mv_col_min < col_min) x->mv_col_min = col_min;
2071
994k
        if (x->mv_col_max > col_max) x->mv_col_max = col_max;
2072
994k
        if (x->mv_row_min < row_min) x->mv_row_min = row_min;
2073
994k
        if (x->mv_row_max > row_max) x->mv_row_max = row_max;
2074
2075
        /* adjust search range according to sr from mv prediction */
2076
994k
        if (sr > step_param) step_param = sr;
2077
2078
        /* Initial step/diamond search */
2079
994k
        {
2080
994k
          bestsme = cpi->diamond_search_sad(
2081
994k
              x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
2082
994k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2083
994k
          mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2084
2085
          /* Further step/diamond searches as necessary */
2086
994k
          further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2087
2088
994k
          n = num00;
2089
994k
          num00 = 0;
2090
2091
          /* If there won't be more n-step search, check to see if refining
2092
           * search is needed. */
2093
994k
          if (n > further_steps) do_refine = 0;
2094
2095
4.25M
          while (n < further_steps) {
2096
3.25M
            n++;
2097
2098
3.25M
            if (num00) {
2099
288k
              num00--;
2100
2.96M
            } else {
2101
2.96M
              thissme = cpi->diamond_search_sad(
2102
2.96M
                  x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
2103
2.96M
                  &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2104
2105
              /* check to see if refining search is needed. */
2106
2.96M
              if (num00 > (further_steps - n)) do_refine = 0;
2107
2108
2.96M
              if (thissme < bestsme) {
2109
456k
                bestsme = thissme;
2110
456k
                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2111
2.51M
              } else {
2112
2.51M
                d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2113
2.51M
              }
2114
2.96M
            }
2115
3.25M
          }
2116
994k
        }
2117
2118
        /* final 1-away diamond refining search */
2119
994k
        if (do_refine == 1) {
2120
647k
          int search_range;
2121
2122
647k
          search_range = 8;
2123
2124
647k
          thissme = cpi->refining_search_sad(
2125
647k
              x, b, d, &d->bmi.mv, sadpb, search_range,
2126
647k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
2127
2128
647k
          if (thissme < bestsme) {
2129
29.0k
            bestsme = thissme;
2130
29.0k
            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2131
618k
          } else {
2132
618k
            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
2133
618k
          }
2134
647k
        }
2135
2136
994k
        x->mv_col_min = tmp_col_min;
2137
994k
        x->mv_col_max = tmp_col_max;
2138
994k
        x->mv_row_min = tmp_row_min;
2139
994k
        x->mv_row_max = tmp_row_max;
2140
2141
994k
        if (bestsme < INT_MAX) {
2142
994k
          int dis; /* TODO: use dis in distortion calculation later. */
2143
994k
          unsigned int sse;
2144
994k
          cpi->find_fractional_mv_step(
2145
994k
              x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
2146
994k
              &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
2147
994k
        }
2148
2149
994k
        mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
2150
2151
        /* Add the new motion vector cost to our rolling cost variable */
2152
994k
        rd.rate2 +=
2153
994k
            vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2154
994k
      }
2155
        // fall through
2156
2157
2.21M
      case NEARESTMV:
2158
3.42M
      case NEARMV:
2159
        /* Clip "next_nearest" so that it does not extend to far out
2160
         * of image
2161
         */
2162
3.42M
        vp8_clamp_mv2(&mode_mv[this_mode], xd);
2163
2164
        /* Do not bother proceeding if the vector (from newmv, nearest
2165
         * or near) is 0,0 as this should then be coded using the zeromv
2166
         * mode.
2167
         */
2168
3.42M
        if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2169
2.43M
            (mode_mv[this_mode].as_int == 0)) {
2170
1.70M
          continue;
2171
1.70M
        }
2172
        // fall through
2173
2174
2.93M
      case ZEROMV:
2175
2176
        /* Trap vectors that reach beyond the UMV borders
2177
         * Note that ALL New MV, Nearest MV Near MV and Zero MV code
2178
         * drops through to this point because of the lack of break
2179
         * statements in the previous two cases.
2180
         */
2181
2.93M
        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
2182
2.93M
            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
2183
2.93M
            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
2184
2.93M
            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
2185
0
          continue;
2186
0
        }
2187
2188
2.93M
        vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2189
2.93M
        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2190
2.93M
        break;
2191
2192
0
      default: break;
2193
8.54M
    }
2194
2195
6.83M
    this_rd =
2196
6.83M
        calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2197
6.83M
                                 uv_intra_tteob, intra_rd_penalty, cpi, x);
2198
2199
    /* Keep record of best intra distortion */
2200
6.83M
    if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2201
3.17M
        (this_rd < best_mode.intra_rd)) {
2202
1.12M
      best_mode.intra_rd = this_rd;
2203
1.12M
      *returnintra = rd.distortion2;
2204
1.12M
    }
2205
6.83M
#if CONFIG_TEMPORAL_DENOISING
2206
6.83M
    if (cpi->oxcf.noise_sensitivity) {
2207
0
      unsigned int sse;
2208
0
      vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
2209
0
                                 mode_mv[this_mode]);
2210
2211
0
      if (sse < best_rd_sse) best_rd_sse = sse;
2212
2213
      /* Store for later use by denoiser. */
2214
0
      if (this_mode == ZEROMV && sse < zero_mv_sse) {
2215
0
        zero_mv_sse = sse;
2216
0
        x->best_zeromv_reference_frame =
2217
0
            x->e_mbd.mode_info_context->mbmi.ref_frame;
2218
0
      }
2219
2220
      /* Store the best NEWMV in x for later use in the denoiser. */
2221
0
      if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
2222
0
        best_sse = sse;
2223
0
        vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
2224
0
                                   mode_mv[this_mode]);
2225
0
        x->best_sse_inter_mode = NEWMV;
2226
0
        x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
2227
0
        x->need_to_clamp_best_mvs =
2228
0
            x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
2229
0
        x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
2230
0
      }
2231
0
    }
2232
6.83M
#endif
2233
2234
    /* Did this mode help.. i.i is it the new best mode */
2235
6.83M
    if (this_rd < best_mode.rd || x->skip) {
2236
      /* Note index of best mode so far */
2237
2.45M
      best_mode_index = mode_index;
2238
2.45M
      *returnrate = rd.rate2;
2239
2.45M
      *returndistortion = rd.distortion2;
2240
2.45M
      if (this_mode <= B_PRED) {
2241
987k
        x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2242
        /* required for left and above block mv */
2243
987k
        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2244
987k
      }
2245
2.45M
      update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2246
2247
      /* Testing this mode gave rise to an improvement in best error
2248
       * score. Lower threshold a bit for next time
2249
       */
2250
2.45M
      x->rd_thresh_mult[mode_index] =
2251
2.45M
          (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
2252
2.45M
              ? x->rd_thresh_mult[mode_index] - 2
2253
2.45M
              : MIN_THRESHMULT;
2254
2.45M
    }
2255
2256
    /* If the mode did not help improve the best error case then raise
2257
     * the threshold for testing that mode next time around.
2258
     */
2259
4.38M
    else {
2260
4.38M
      x->rd_thresh_mult[mode_index] += 4;
2261
2262
4.38M
      if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
2263
2.27M
        x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2264
2.27M
      }
2265
4.38M
    }
2266
6.83M
    x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
2267
6.83M
                                 x->rd_thresh_mult[mode_index];
2268
2269
6.83M
    if (x->skip) break;
2270
6.83M
  }
2271
2272
  /* Reduce the activation RD thresholds for the best choice mode */
2273
767k
  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
2274
482k
      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
2275
482k
    int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
2276
2277
482k
    x->rd_thresh_mult[best_mode_index] =
2278
482k
        (x->rd_thresh_mult[best_mode_index] >=
2279
482k
         (MIN_THRESHMULT + best_adjustment))
2280
482k
            ? x->rd_thresh_mult[best_mode_index] - best_adjustment
2281
482k
            : MIN_THRESHMULT;
2282
482k
    x->rd_threshes[best_mode_index] =
2283
482k
        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
2284
482k
        x->rd_thresh_mult[best_mode_index];
2285
482k
  }
2286
2287
767k
#if CONFIG_TEMPORAL_DENOISING
2288
767k
  if (cpi->oxcf.noise_sensitivity) {
2289
0
    int block_index = mb_row * cpi->common.mb_cols + mb_col;
2290
0
    if (x->best_sse_inter_mode == DC_PRED) {
2291
      /* No best MV found. */
2292
0
      x->best_sse_inter_mode = best_mode.mbmode.mode;
2293
0
      x->best_sse_mv = best_mode.mbmode.mv;
2294
0
      x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
2295
0
      x->best_reference_frame = best_mode.mbmode.ref_frame;
2296
0
      best_sse = best_rd_sse;
2297
0
    }
2298
0
    vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
2299
0
                            recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
2300
0
                            mb_row, mb_col, block_index, 0);
2301
2302
    /* Reevaluate ZEROMV after denoising. */
2303
0
    if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
2304
0
        x->best_zeromv_reference_frame != INTRA_FRAME) {
2305
0
      int this_rd = INT_MAX;
2306
0
      int disable_skip = 0;
2307
0
      int other_cost = 0;
2308
0
      int this_ref_frame = x->best_zeromv_reference_frame;
2309
0
      rd.rate2 =
2310
0
          x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
2311
0
      rd.distortion2 = 0;
2312
2313
      /* set up the proper prediction buffers for the frame */
2314
0
      x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
2315
0
      x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
2316
0
      x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
2317
0
      x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
2318
2319
0
      x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2320
0
      x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2321
0
      x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2322
2323
0
      this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
2324
0
      this_rd =
2325
0
          calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
2326
0
                                   uv_intra_tteob, intra_rd_penalty, cpi, x);
2327
0
      if (this_rd < best_mode.rd || x->skip) {
2328
0
        *returnrate = rd.rate2;
2329
0
        *returndistortion = rd.distortion2;
2330
0
        update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
2331
0
      }
2332
0
    }
2333
0
  }
2334
767k
#endif
2335
2336
767k
  if (cpi->is_src_frame_alt_ref &&
2337
0
      (best_mode.mbmode.mode != ZEROMV ||
2338
0
       best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
2339
0
    x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
2340
0
    x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
2341
0
    x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2342
0
    x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
2343
0
    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
2344
0
        (cpi->common.mb_no_coeff_skip);
2345
0
    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
2346
0
    return;
2347
0
  }
2348
2349
  /* macroblock modes */
2350
767k
  x->e_mbd.mode_info_context->mbmi = best_mode.mbmode;
2351
2352
767k
  if (best_mode.mbmode.mode == B_PRED) {
2353
3.38M
    for (i = 0; i < 16; ++i) {
2354
3.18M
      xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
2355
3.18M
    }
2356
199k
  }
2357
2358
767k
  if (best_mode.mbmode.mode == SPLITMV) {
2359
2.99M
    for (i = 0; i < 16; ++i) {
2360
2.81M
      xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
2361
2.81M
    }
2362
2363
176k
    *x->partition_info = best_mode.partition;
2364
2365
176k
    x->e_mbd.mode_info_context->mbmi.mv.as_int =
2366
176k
        x->partition_info->bmi[15].mv.as_int;
2367
176k
  }
2368
2369
767k
  if (sign_bias !=
2370
767k
      cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
2371
0
    best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
2372
0
  }
2373
2374
767k
  rd_update_mvcount(x, &best_ref_mv);
2375
767k
}
2376
2377
652k
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
2378
652k
  int error4x4, error16x16;
2379
652k
  int rate4x4, rate16x16 = 0, rateuv;
2380
652k
  int dist4x4, dist16x16, distuv;
2381
652k
  int rate_;
2382
652k
  int rate4x4_tokenonly = 0;
2383
652k
  int rate16x16_tokenonly = 0;
2384
652k
  int rateuv_tokenonly = 0;
2385
2386
652k
  x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2387
2388
652k
  rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
2389
652k
  rate_ = rateuv;
2390
2391
652k
  error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
2392
652k
                                          &dist16x16);
2393
2394
652k
  error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
2395
652k
                                       &dist4x4, error16x16);
2396
2397
652k
  if (error4x4 < error16x16) {
2398
298k
    x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
2399
298k
    rate_ += rate4x4;
2400
354k
  } else {
2401
354k
    rate_ += rate16x16;
2402
354k
  }
2403
2404
652k
  *rate = rate_;
2405
652k
}