Coverage Report

Created: 2024-09-06 07:53

/src/libvpx/vp8/encoder/encodemb.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "./vpx_dsp_rtcd.h"
12
13
#include "vpx_config.h"
14
#include "vp8_rtcd.h"
15
#include "encodemb.h"
16
#include "vp8/common/reconinter.h"
17
#include "vp8/encoder/quantize.h"
18
#include "tokenize.h"
19
#include "vp8/common/invtrans.h"
20
#include "vpx_mem/vpx_mem.h"
21
#include "rdopt.h"
22
23
172M
void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) {
24
172M
  unsigned char *src_ptr = (*(be->base_src) + be->src);
25
172M
  short *diff_ptr = be->src_diff;
26
172M
  unsigned char *pred_ptr = bd->predictor;
27
172M
  int src_stride = be->src_stride;
28
29
172M
  vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride, pred_ptr,
30
172M
                     pitch);
31
172M
}
32
33
void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
34
                       int src_stride, unsigned char *upred,
35
8.26M
                       unsigned char *vpred, int pred_stride) {
36
8.26M
  short *udiff = diff + 256;
37
8.26M
  short *vdiff = diff + 320;
38
39
8.26M
  vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride);
40
8.26M
  vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride);
41
8.26M
}
42
43
void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
44
6.96M
                      unsigned char *pred, int pred_stride) {
45
6.96M
  vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride);
46
6.96M
}
47
48
504k
static void vp8_subtract_mb(MACROBLOCK *x) {
49
504k
  BLOCK *b = &x->block[0];
50
51
504k
  vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
52
504k
                   x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
53
504k
  vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
54
504k
                    x->src.uv_stride, x->e_mbd.dst.u_buffer,
55
504k
                    x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
56
504k
}
57
58
1.25M
static void build_dcblock(MACROBLOCK *x) {
59
1.25M
  short *src_diff_ptr = &x->src_diff[384];
60
1.25M
  int i;
61
62
21.2M
  for (i = 0; i < 16; ++i) {
63
20.0M
    src_diff_ptr[i] = x->coeff[i * 16];
64
20.0M
  }
65
1.25M
}
66
67
7.76M
void vp8_transform_mbuv(MACROBLOCK *x) {
68
7.76M
  int i;
69
70
38.8M
  for (i = 16; i < 24; i += 2) {
71
31.0M
    x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
72
31.0M
  }
73
7.76M
}
74
75
860k
void vp8_transform_intra_mby(MACROBLOCK *x) {
76
860k
  int i;
77
78
7.74M
  for (i = 0; i < 16; i += 2) {
79
6.88M
    x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
80
6.88M
  }
81
82
  /* build dc block from 16 y dc values */
83
860k
  build_dcblock(x);
84
85
  /* do 2nd order transform on the dc block */
86
860k
  x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
87
860k
}
88
89
504k
static void transform_mb(MACROBLOCK *x) {
90
504k
  int i;
91
92
4.54M
  for (i = 0; i < 16; i += 2) {
93
4.03M
    x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
94
4.03M
  }
95
96
  /* build dc block from 16 y dc values */
97
504k
  if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) build_dcblock(x);
98
99
2.52M
  for (i = 16; i < 24; i += 2) {
100
2.01M
    x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
101
2.01M
  }
102
103
  /* do 2nd order transform on the dc block */
104
504k
  if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
105
391k
    x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
106
391k
  }
107
504k
}
108
109
0
static void transform_mby(MACROBLOCK *x) {
110
0
  int i;
111
112
0
  for (i = 0; i < 16; i += 2) {
113
0
    x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
114
0
  }
115
116
  /* build dc block from 16 y dc values */
117
0
  if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
118
0
    build_dcblock(x);
119
0
    x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
120
0
  }
121
0
}
122
123
0
#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
124
125
typedef struct vp8_token_state vp8_token_state;
126
127
struct vp8_token_state {
128
  int rate;
129
  int error;
130
  signed char next;
131
  signed char token;
132
  short qc;
133
};
134
135
/* TODO: experiments to find optimal multiple numbers */
136
#define Y1_RD_MULT 4
137
#define UV_RD_MULT 2
138
#define Y2_RD_MULT 16
139
140
static const int plane_rd_mult[4] = { Y1_RD_MULT, Y2_RD_MULT, UV_RD_MULT,
141
                                      Y1_RD_MULT };
142
143
static void optimize_b(MACROBLOCK *mb, int ib, int type, ENTROPY_CONTEXT *a,
144
0
                       ENTROPY_CONTEXT *l) {
145
0
  BLOCK *b;
146
0
  BLOCKD *d;
147
0
  vp8_token_state tokens[17][2];
148
0
  unsigned best_mask[2];
149
0
  const short *dequant_ptr;
150
0
  const short *coeff_ptr;
151
0
  short *qcoeff_ptr;
152
0
  short *dqcoeff_ptr;
153
0
  int eob;
154
0
  int i0;
155
0
  int rc;
156
0
  int x;
157
0
  int sz = 0;
158
0
  int next;
159
0
  int rdmult;
160
0
  int rddiv;
161
0
  int final_eob;
162
0
  int rd_cost0;
163
0
  int rd_cost1;
164
0
  int rate0;
165
0
  int rate1;
166
0
  int error0;
167
0
  int error1;
168
0
  int t0;
169
0
  int t1;
170
0
  int best;
171
0
  int band;
172
0
  int pt;
173
0
  int i;
174
0
  int err_mult = plane_rd_mult[type];
175
176
0
  b = &mb->block[ib];
177
0
  d = &mb->e_mbd.block[ib];
178
179
0
  dequant_ptr = d->dequant;
180
0
  coeff_ptr = b->coeff;
181
0
  qcoeff_ptr = d->qcoeff;
182
0
  dqcoeff_ptr = d->dqcoeff;
183
0
  i0 = !type;
184
0
  eob = *d->eob;
185
186
  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
187
0
  rdmult = mb->rdmult * err_mult;
188
0
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
189
0
    rdmult = (rdmult * 9) >> 4;
190
0
  }
191
192
0
  rddiv = mb->rddiv;
193
0
  best_mask[0] = best_mask[1] = 0;
194
  /* Initialize the sentinel node of the trellis. */
195
0
  tokens[eob][0].rate = 0;
196
0
  tokens[eob][0].error = 0;
197
0
  tokens[eob][0].next = 16;
198
0
  tokens[eob][0].token = DCT_EOB_TOKEN;
199
0
  tokens[eob][0].qc = 0;
200
0
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
201
0
  next = eob;
202
0
  for (i = eob; i-- > i0;) {
203
0
    int base_bits;
204
0
    int d2;
205
0
    int dx;
206
207
0
    rc = vp8_default_zig_zag1d[i];
208
0
    x = qcoeff_ptr[rc];
209
    /* Only add a trellis state for non-zero coefficients. */
210
0
    if (x) {
211
0
      int shortcut = 0;
212
0
      error0 = tokens[next][0].error;
213
0
      error1 = tokens[next][1].error;
214
      /* Evaluate the first possibility for this state. */
215
0
      rate0 = tokens[next][0].rate;
216
0
      rate1 = tokens[next][1].rate;
217
0
      t0 = (vp8_dct_value_tokens_ptr + x)->Token;
218
      /* Consider both possible successor states. */
219
0
      if (next < 16) {
220
0
        band = vp8_coef_bands[i + 1];
221
0
        pt = vp8_prev_token_class[t0];
222
0
        rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
223
0
        rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
224
0
      }
225
0
      rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
226
0
      rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
227
0
      if (rd_cost0 == rd_cost1) {
228
0
        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
229
0
        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
230
0
      }
231
      /* And pick the best. */
232
0
      best = rd_cost1 < rd_cost0;
233
0
      base_bits = *(vp8_dct_value_cost_ptr + x);
234
0
      dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
235
0
      d2 = dx * dx;
236
0
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
237
0
      tokens[i][0].error = d2 + (best ? error1 : error0);
238
0
      tokens[i][0].next = next;
239
0
      tokens[i][0].token = t0;
240
0
      tokens[i][0].qc = x;
241
0
      best_mask[0] |= best << i;
242
      /* Evaluate the second possibility for this state. */
243
0
      rate0 = tokens[next][0].rate;
244
0
      rate1 = tokens[next][1].rate;
245
246
0
      if ((abs(x) * dequant_ptr[rc] > abs(coeff_ptr[rc])) &&
247
0
          (abs(x) * dequant_ptr[rc] < abs(coeff_ptr[rc]) + dequant_ptr[rc])) {
248
0
        shortcut = 1;
249
0
      } else {
250
0
        shortcut = 0;
251
0
      }
252
253
0
      if (shortcut) {
254
0
        sz = -(x < 0);
255
0
        x -= 2 * sz + 1;
256
0
      }
257
258
      /* Consider both possible successor states. */
259
0
      if (!x) {
260
        /* If we reduced this coefficient to zero, check to see if
261
         *  we need to move the EOB back here.
262
         */
263
0
        t0 =
264
0
            tokens[next][0].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
265
0
        t1 =
266
0
            tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
267
0
      } else {
268
0
        t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token;
269
0
      }
270
0
      if (next < 16) {
271
0
        band = vp8_coef_bands[i + 1];
272
0
        if (t0 != DCT_EOB_TOKEN) {
273
0
          pt = vp8_prev_token_class[t0];
274
0
          rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
275
0
        }
276
0
        if (t1 != DCT_EOB_TOKEN) {
277
0
          pt = vp8_prev_token_class[t1];
278
0
          rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
279
0
        }
280
0
      }
281
282
0
      rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
283
0
      rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
284
0
      if (rd_cost0 == rd_cost1) {
285
0
        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
286
0
        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
287
0
      }
288
      /* And pick the best. */
289
0
      best = rd_cost1 < rd_cost0;
290
0
      base_bits = *(vp8_dct_value_cost_ptr + x);
291
292
0
      if (shortcut) {
293
0
        dx -= (dequant_ptr[rc] + sz) ^ sz;
294
0
        d2 = dx * dx;
295
0
      }
296
0
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
297
0
      tokens[i][1].error = d2 + (best ? error1 : error0);
298
0
      tokens[i][1].next = next;
299
0
      tokens[i][1].token = best ? t1 : t0;
300
0
      tokens[i][1].qc = x;
301
0
      best_mask[1] |= best << i;
302
      /* Finally, make this the new head of the trellis. */
303
0
      next = i;
304
0
    }
305
    /* There's no choice to make for a zero coefficient, so we don't
306
     *  add a new trellis node, but we do need to update the costs.
307
     */
308
0
    else {
309
0
      band = vp8_coef_bands[i + 1];
310
0
      t0 = tokens[next][0].token;
311
0
      t1 = tokens[next][1].token;
312
      /* Update the cost of each path if we're past the EOB token. */
313
0
      if (t0 != DCT_EOB_TOKEN) {
314
0
        tokens[next][0].rate += mb->token_costs[type][band][0][t0];
315
0
        tokens[next][0].token = ZERO_TOKEN;
316
0
      }
317
0
      if (t1 != DCT_EOB_TOKEN) {
318
0
        tokens[next][1].rate += mb->token_costs[type][band][0][t1];
319
0
        tokens[next][1].token = ZERO_TOKEN;
320
0
      }
321
      /* Don't update next, because we didn't add a new node. */
322
0
    }
323
0
  }
324
325
  /* Now pick the best path through the whole trellis. */
326
0
  band = vp8_coef_bands[i + 1];
327
0
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
328
0
  rate0 = tokens[next][0].rate;
329
0
  rate1 = tokens[next][1].rate;
330
0
  error0 = tokens[next][0].error;
331
0
  error1 = tokens[next][1].error;
332
0
  t0 = tokens[next][0].token;
333
0
  t1 = tokens[next][1].token;
334
0
  rate0 += mb->token_costs[type][band][pt][t0];
335
0
  rate1 += mb->token_costs[type][band][pt][t1];
336
0
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
337
0
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
338
0
  if (rd_cost0 == rd_cost1) {
339
0
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
340
0
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
341
0
  }
342
0
  best = rd_cost1 < rd_cost0;
343
0
  final_eob = i0 - 1;
344
0
  for (i = next; i < eob; i = next) {
345
0
    x = tokens[i][best].qc;
346
0
    if (x) final_eob = i;
347
0
    rc = vp8_default_zig_zag1d[i];
348
0
    qcoeff_ptr[rc] = x;
349
0
    dqcoeff_ptr[rc] = x * dequant_ptr[rc];
350
0
    next = tokens[i][best].next;
351
0
    best = (best_mask[best] >> i) & 1;
352
0
  }
353
0
  final_eob++;
354
355
0
  *a = *l = (final_eob != !type);
356
0
  *d->eob = (char)final_eob;
357
0
}
358
static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, ENTROPY_CONTEXT *a,
359
0
                                   ENTROPY_CONTEXT *l) {
360
0
  int sum = 0;
361
0
  int i;
362
0
  BLOCKD *bd = &x->block[24];
363
364
0
  if (bd->dequant[0] >= 35 && bd->dequant[1] >= 35) return;
365
366
0
  for (i = 0; i < (*bd->eob); ++i) {
367
0
    int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
368
0
    sum += (coef >= 0) ? coef : -coef;
369
0
    if (sum >= 35) return;
370
0
  }
371
  /**************************************************************************
372
  our inverse hadamard transform effectively is weighted sum of all 16 inputs
373
  with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And
374
  dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the
375
  output after inverse wht and idct will be all zero. A sum of absolute value
376
  smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht
377
  fall between -35 and +35.
378
  **************************************************************************/
379
0
  if (sum < 35) {
380
0
    for (i = 0; i < (*bd->eob); ++i) {
381
0
      int rc = vp8_default_zig_zag1d[i];
382
0
      bd->qcoeff[rc] = 0;
383
0
      bd->dqcoeff[rc] = 0;
384
0
    }
385
0
    *bd->eob = 0;
386
0
    *a = *l = (*bd->eob != !type);
387
0
  }
388
0
}
389
390
0
static void optimize_mb(MACROBLOCK *x) {
391
0
  int b;
392
0
  int type;
393
0
  int has_2nd_order;
394
395
0
  ENTROPY_CONTEXT_PLANES t_above, t_left;
396
0
  ENTROPY_CONTEXT *ta;
397
0
  ENTROPY_CONTEXT *tl;
398
399
0
  memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
400
0
  memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
401
402
0
  ta = (ENTROPY_CONTEXT *)&t_above;
403
0
  tl = (ENTROPY_CONTEXT *)&t_left;
404
405
0
  has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
406
0
                   x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
407
0
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
408
409
0
  for (b = 0; b < 16; ++b) {
410
0
    optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
411
0
  }
412
413
0
  for (b = 16; b < 24; ++b) {
414
0
    optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
415
0
               tl + vp8_block2left[b]);
416
0
  }
417
418
0
  if (has_2nd_order) {
419
0
    b = 24;
420
0
    optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
421
0
               tl + vp8_block2left[b]);
422
0
    check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
423
0
                           tl + vp8_block2left[b]);
424
0
  }
425
0
}
426
427
0
void vp8_optimize_mby(MACROBLOCK *x) {
428
0
  int b;
429
0
  int type;
430
0
  int has_2nd_order;
431
432
0
  ENTROPY_CONTEXT_PLANES t_above, t_left;
433
0
  ENTROPY_CONTEXT *ta;
434
0
  ENTROPY_CONTEXT *tl;
435
436
0
  if (!x->e_mbd.above_context) return;
437
438
0
  if (!x->e_mbd.left_context) return;
439
440
0
  memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
441
0
  memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
442
443
0
  ta = (ENTROPY_CONTEXT *)&t_above;
444
0
  tl = (ENTROPY_CONTEXT *)&t_left;
445
446
0
  has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
447
0
                   x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
448
0
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
449
450
0
  for (b = 0; b < 16; ++b) {
451
0
    optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
452
0
  }
453
454
0
  if (has_2nd_order) {
455
0
    b = 24;
456
0
    optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
457
0
               tl + vp8_block2left[b]);
458
0
    check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
459
0
                           tl + vp8_block2left[b]);
460
0
  }
461
0
}
462
463
0
void vp8_optimize_mbuv(MACROBLOCK *x) {
464
0
  int b;
465
0
  ENTROPY_CONTEXT_PLANES t_above, t_left;
466
0
  ENTROPY_CONTEXT *ta;
467
0
  ENTROPY_CONTEXT *tl;
468
469
0
  if (!x->e_mbd.above_context) return;
470
471
0
  if (!x->e_mbd.left_context) return;
472
473
0
  memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
474
0
  memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
475
476
0
  ta = (ENTROPY_CONTEXT *)&t_above;
477
0
  tl = (ENTROPY_CONTEXT *)&t_left;
478
479
0
  for (b = 16; b < 24; ++b) {
480
0
    optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
481
0
               tl + vp8_block2left[b]);
482
0
  }
483
0
}
484
485
504k
void vp8_encode_inter16x16(MACROBLOCK *x) {
486
504k
  vp8_build_inter_predictors_mb(&x->e_mbd);
487
488
504k
  vp8_subtract_mb(x);
489
490
504k
  transform_mb(x);
491
492
504k
  vp8_quantize_mb(x);
493
494
504k
  if (x->optimize) optimize_mb(x);
495
504k
}
496
497
/* this funciton is used by first pass only */
498
0
void vp8_encode_inter16x16y(MACROBLOCK *x) {
499
0
  BLOCK *b = &x->block[0];
500
501
0
  vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
502
0
                                      x->e_mbd.dst.y_stride);
503
504
0
  vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
505
0
                   x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
506
507
0
  transform_mby(x);
508
509
0
  vp8_quantize_mby(x);
510
511
0
  vp8_inverse_transform_mby(&x->e_mbd);
512
0
}