Coverage Report

Created: 2024-09-06 07:53

/src/libvpx/vp9/encoder/vp9_encodemb.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "./vp9_rtcd.h"
12
#include "./vpx_config.h"
13
#include "./vpx_dsp_rtcd.h"
14
15
#include "vpx_dsp/quantize.h"
16
#include "vpx_mem/vpx_mem.h"
17
#include "vpx_ports/mem.h"
18
19
#if CONFIG_MISMATCH_DEBUG
20
#include "vpx_util/vpx_debug_util.h"
21
#endif
22
23
#include "vp9/common/vp9_idct.h"
24
#include "vp9/common/vp9_reconinter.h"
25
#include "vp9/common/vp9_reconintra.h"
26
#include "vp9/common/vp9_scan.h"
27
28
#include "vp9/encoder/vp9_encodemb.h"
29
#include "vp9/encoder/vp9_encoder.h"
30
#include "vp9/encoder/vp9_rd.h"
31
#include "vp9/encoder/vp9_tokenize.h"
32
33
struct optimize_ctx {
34
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
35
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
36
};
37
38
41.7M
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
39
41.7M
  struct macroblock_plane *const p = &x->plane[plane];
40
41.7M
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
41
41.7M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
42
41.7M
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
43
41.7M
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
44
45
41.7M
#if CONFIG_VP9_HIGHBITDEPTH
46
41.7M
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
47
0
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
48
0
                              p->src.stride, pd->dst.buf, pd->dst.stride,
49
0
                              x->e_mbd.bd);
50
0
    return;
51
0
  }
52
41.7M
#endif  // CONFIG_VP9_HIGHBITDEPTH
53
41.7M
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
54
41.7M
                     pd->dst.buf, pd->dst.stride);
55
41.7M
}
56
57
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
58
  { 10, 6 },
59
  { 8, 5 },
60
};
61
62
// 'num' can be negative, but 'shift' must be non-negative.
63
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
64
728k
  (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
65
66
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
67
15.3M
                   int ctx) {
68
15.3M
  MACROBLOCKD *const xd = &mb->e_mbd;
69
15.3M
  struct macroblock_plane *const p = &mb->plane[plane];
70
15.3M
  struct macroblockd_plane *const pd = &xd->plane[plane];
71
15.3M
  const int ref = is_inter_block(xd->mi[0]);
72
15.3M
  uint8_t token_cache[1024];
73
15.3M
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
74
15.3M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
75
15.3M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
76
15.3M
  const int eob = p->eobs[block];
77
15.3M
  const PLANE_TYPE plane_type = get_plane_type(plane);
78
15.3M
  const int default_eob = 16 << (tx_size << 1);
79
15.3M
  const int shift = (tx_size == TX_32X32);
80
15.3M
  const int16_t *const dequant_ptr = pd->dequant;
81
15.3M
  const uint8_t *const band_translate = get_band_translate(tx_size);
82
15.3M
  const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block);
83
15.3M
  const int16_t *const scan = so->scan;
84
15.3M
  const int16_t *const nb = so->neighbors;
85
15.3M
  const MODE_INFO *mbmi = xd->mi[0];
86
15.3M
  const int sharpness = mb->sharpness;
87
15.3M
  const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
88
15.3M
  const int64_t rdmult =
89
15.3M
      (sharpness == 0 ? rdadj >> 1
90
15.3M
                      : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
91
92
15.3M
  const int64_t rddiv = mb->rddiv;
93
15.3M
  int64_t rd_cost0, rd_cost1;
94
15.3M
  int64_t rate0, rate1;
95
15.3M
  int16_t t0, t1;
96
15.3M
  int i, final_eob;
97
15.3M
  int count_high_values_after_eob = 0;
98
15.3M
#if CONFIG_VP9_HIGHBITDEPTH
99
15.3M
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
100
#else
101
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
102
#endif
103
15.3M
  unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
104
15.3M
      mb->token_costs[tx_size][plane_type][ref];
105
15.3M
  unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
106
15.3M
  int64_t eob_cost0, eob_cost1;
107
15.3M
  const int ctx0 = ctx;
108
15.3M
  int64_t accu_rate = 0;
109
  // Initialized to the worst possible error for the largest transform size.
110
  // This ensures that it never goes negative.
111
15.3M
  int64_t accu_error = ((int64_t)1) << 50;
112
15.3M
  int64_t best_block_rd_cost = INT64_MAX;
113
15.3M
  int x_prev = 1;
114
15.3M
  tran_low_t before_best_eob_qc = 0;
115
15.3M
  tran_low_t before_best_eob_dqc = 0;
116
117
15.3M
  assert((!plane_type && !plane) || (plane_type && plane));
118
15.3M
  assert(eob <= default_eob);
119
120
312M
  for (i = 0; i < eob; i++) {
121
297M
    const int rc = scan[i];
122
297M
    token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
123
297M
  }
124
15.3M
  final_eob = 0;
125
126
  // Initial RD cost.
127
15.3M
  token_costs_cur = token_costs + band_translate[0];
128
15.3M
  rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
129
15.3M
  best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
130
131
  // For each token, pick one of two choices greedily:
132
  // (i) First candidate: Keep current quantized value, OR
133
  // (ii) Second candidate: Reduce quantized value by 1.
134
312M
  for (i = 0; i < eob; i++) {
135
297M
    const int rc = scan[i];
136
297M
    const int x = qcoeff[rc];
137
297M
    const int band_cur = band_translate[i];
138
297M
    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
139
297M
    const int token_tree_sel_cur = (x_prev == 0);
140
297M
    token_costs_cur = token_costs + band_cur;
141
297M
    if (x == 0) {  // No need to search
142
115M
      const int token = vp9_get_token(x);
143
115M
      rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
144
115M
      accu_rate += rate0;
145
115M
      x_prev = 0;
146
      // Note: accu_error does not change.
147
181M
    } else {
148
181M
      const int dqv = dequant_ptr[rc != 0];
149
      // Compute the distortion for quantizing to 0.
150
181M
      const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
151
181M
      const int diff_for_zero =
152
181M
#if CONFIG_VP9_HIGHBITDEPTH
153
181M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
154
181M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
155
181M
              :
156
181M
#endif
157
181M
              diff_for_zero_raw;
158
181M
      const int64_t distortion_for_zero =
159
181M
          (int64_t)diff_for_zero * diff_for_zero;
160
161
      // Compute the distortion for the first candidate
162
181M
      const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
163
181M
      const int diff0 =
164
181M
#if CONFIG_VP9_HIGHBITDEPTH
165
181M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
166
181M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
167
181M
              :
168
181M
#endif  // CONFIG_VP9_HIGHBITDEPTH
169
181M
              diff0_raw;
170
181M
      const int64_t distortion0 = (int64_t)diff0 * diff0;
171
172
      // Compute the distortion for the second candidate
173
181M
      const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
174
181M
      const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
175
181M
      int64_t distortion1;
176
181M
      if (x1 != 0) {
177
120M
        const int dqv_step =
178
120M
#if CONFIG_VP9_HIGHBITDEPTH
179
120M
            (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
180
120M
                                                          :
181
120M
#endif  // CONFIG_VP9_HIGHBITDEPTH
182
120M
                                                          dqv;
183
120M
        const int diff_step = (dqv_step + sign) ^ sign;
184
120M
        const int diff1 = diff0 - diff_step;
185
120M
        assert(dqv > 0);  // We aren't right shifting a negative number above.
186
120M
        distortion1 = (int64_t)diff1 * diff1;
187
120M
      } else {
188
60.8M
        distortion1 = distortion_for_zero;
189
60.8M
      }
190
181M
      {
191
        // Calculate RDCost for current coeff for the two candidates.
192
181M
        const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
193
181M
        const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
194
181M
        rate0 =
195
181M
            base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
196
181M
        rate1 =
197
181M
            base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
198
181M
      }
199
181M
      {
200
181M
        int rdcost_better_for_x1, eob_rdcost_better_for_x1;
201
181M
        int dqc0, dqc1;
202
181M
        int64_t best_eob_cost_cur;
203
181M
        int use_x1;
204
205
        // Calculate RD Cost effect on the next coeff for the two candidates.
206
181M
        int64_t next_bits0 = 0;
207
181M
        int64_t next_bits1 = 0;
208
181M
        int64_t next_eob_bits0 = 0;
209
181M
        int64_t next_eob_bits1 = 0;
210
181M
        if (i < default_eob - 1) {
211
176M
          int ctx_next, token_tree_sel_next;
212
176M
          const int band_next = band_translate[i + 1];
213
176M
          const int token_next =
214
176M
              (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
215
176M
          unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
216
176M
                                               [ENTROPY_TOKENS] =
217
176M
                                                   token_costs + band_next;
218
176M
          token_cache[rc] = vp9_pt_energy_class[t0];
219
176M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
220
176M
          token_tree_sel_next = (x == 0);
221
176M
          next_bits0 =
222
176M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
223
176M
          next_eob_bits0 =
224
176M
              (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
225
176M
          token_cache[rc] = vp9_pt_energy_class[t1];
226
176M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
227
176M
          token_tree_sel_next = (x1 == 0);
228
176M
          next_bits1 =
229
176M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
230
176M
          if (x1 != 0) {
231
117M
            next_eob_bits1 =
232
117M
                (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
233
117M
          }
234
176M
        }
235
236
        // Compare the total RD costs for two candidates.
237
181M
        rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
238
181M
        rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
239
181M
        rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
240
181M
        eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
241
181M
                           (accu_error + distortion0 - distortion_for_zero));
242
181M
        eob_cost1 = eob_cost0;
243
181M
        if (x1 != 0) {
244
120M
          eob_cost1 =
245
120M
              RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
246
120M
                     (accu_error + distortion1 - distortion_for_zero));
247
120M
          eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
248
120M
        } else {
249
60.8M
          eob_rdcost_better_for_x1 = 0;
250
60.8M
        }
251
252
        // Calculate the two candidate de-quantized values.
253
181M
        dqc0 = dqcoeff[rc];
254
181M
        dqc1 = 0;
255
181M
        if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
256
1.86M
          if (x1 != 0) {
257
728k
            dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
258
1.14M
          } else {
259
1.14M
            dqc1 = 0;
260
1.14M
          }
261
1.86M
        }
262
263
        // Pick and record the better quantized and de-quantized values.
264
181M
        if (rdcost_better_for_x1) {
265
1.78M
          qcoeff[rc] = x1;
266
1.78M
          dqcoeff[rc] = dqc1;
267
1.78M
          accu_rate += rate1;
268
1.78M
          accu_error += distortion1 - distortion_for_zero;
269
1.78M
          assert(distortion1 <= distortion_for_zero);
270
1.78M
          token_cache[rc] = vp9_pt_energy_class[t1];
271
179M
        } else {
272
179M
          accu_rate += rate0;
273
179M
          accu_error += distortion0 - distortion_for_zero;
274
179M
          assert(distortion0 <= distortion_for_zero);
275
179M
          token_cache[rc] = vp9_pt_energy_class[t0];
276
179M
        }
277
181M
        if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
278
181M
        assert(accu_error >= 0);
279
181M
        x_prev = qcoeff[rc];  // Update based on selected quantized value.
280
281
181M
        use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
282
181M
        best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
283
284
        // Determine whether to move the eob position to i+1
285
181M
        if (best_eob_cost_cur < best_block_rd_cost) {
286
173M
          best_block_rd_cost = best_eob_cost_cur;
287
173M
          final_eob = i + 1;
288
173M
          count_high_values_after_eob = 0;
289
173M
          if (use_x1) {
290
722k
            before_best_eob_qc = x1;
291
722k
            before_best_eob_dqc = dqc1;
292
172M
          } else {
293
172M
            before_best_eob_qc = x;
294
172M
            before_best_eob_dqc = dqc0;
295
172M
          }
296
173M
        }
297
181M
      }
298
181M
    }
299
297M
  }
300
15.3M
  if (count_high_values_after_eob > 0) {
301
0
    final_eob = eob - 1;
302
0
    for (; final_eob >= 0; final_eob--) {
303
0
      const int rc = scan[final_eob];
304
0
      const int x = qcoeff[rc];
305
0
      if (x) {
306
0
        break;
307
0
      }
308
0
    }
309
0
    final_eob++;
310
15.3M
  } else {
311
15.3M
    assert(final_eob <= eob);
312
15.3M
    if (final_eob > 0) {
313
8.81M
      int rc;
314
8.81M
      assert(before_best_eob_qc != 0);
315
8.81M
      i = final_eob - 1;
316
8.81M
      rc = scan[i];
317
8.81M
      qcoeff[rc] = before_best_eob_qc;
318
8.81M
      dqcoeff[rc] = before_best_eob_dqc;
319
8.81M
    }
320
34.6M
    for (i = final_eob; i < eob; i++) {
321
19.2M
      int rc = scan[i];
322
19.2M
      qcoeff[rc] = 0;
323
19.2M
      dqcoeff[rc] = 0;
324
19.2M
    }
325
15.3M
  }
326
15.3M
  mb->plane[plane].eobs[block] = final_eob;
327
15.3M
  return final_eob;
328
15.3M
}
329
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
330
331
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
332
3.70M
                             tran_low_t *dst, int src_stride) {
333
3.70M
  if (rd_transform)
334
3.58M
    vpx_fdct32x32_rd(src, dst, src_stride);
335
119k
  else
336
119k
    vpx_fdct32x32(src, dst, src_stride);
337
3.70M
}
338
339
#if CONFIG_VP9_HIGHBITDEPTH
340
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
341
0
                                    tran_low_t *dst, int src_stride) {
342
0
  if (rd_transform)
343
0
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
344
0
  else
345
0
    vpx_highbd_fdct32x32(src, dst, src_stride);
346
0
}
347
#endif  // CONFIG_VP9_HIGHBITDEPTH
348
349
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
350
0
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
351
0
  MACROBLOCKD *const xd = &x->e_mbd;
352
0
  const struct macroblock_plane *const p = &x->plane[plane];
353
0
  const struct macroblockd_plane *const pd = &xd->plane[plane];
354
0
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
355
0
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
356
0
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
357
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
358
0
  uint16_t *const eob = &p->eobs[block];
359
0
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
360
0
  const int16_t *src_diff;
361
0
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
362
  // skip block condition should be handled before this is called.
363
0
  assert(!x->skip_block);
364
365
0
#if CONFIG_VP9_HIGHBITDEPTH
366
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
367
0
    switch (tx_size) {
368
0
      case TX_32X32:
369
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
370
0
        vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff,
371
0
                                     pd->dequant, eob, scan_order);
372
0
        break;
373
0
      case TX_16X16:
374
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375
0
        vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
376
0
                               scan_order);
377
0
        break;
378
0
      case TX_8X8:
379
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
380
0
        vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
381
0
                               scan_order);
382
0
        break;
383
0
      default:
384
0
        assert(tx_size == TX_4X4);
385
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
386
0
        vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
387
0
                               scan_order);
388
0
        break;
389
0
    }
390
0
    return;
391
0
  }
392
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
393
394
0
  switch (tx_size) {
395
0
    case TX_32X32:
396
0
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
397
0
      vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob,
398
0
                            scan_order);
399
0
      break;
400
0
    case TX_16X16:
401
0
      vpx_fdct16x16(src_diff, coeff, diff_stride);
402
0
      vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
403
0
                      scan_order);
404
0
      break;
405
0
    case TX_8X8:
406
0
      vpx_fdct8x8(src_diff, coeff, diff_stride);
407
0
      vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
408
0
                      scan_order);
409
410
0
      break;
411
0
    default:
412
0
      assert(tx_size == TX_4X4);
413
0
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
414
0
      vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
415
0
                      scan_order);
416
0
      break;
417
0
  }
418
0
}
419
420
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
421
301k
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
422
301k
  MACROBLOCKD *const xd = &x->e_mbd;
423
301k
  const struct macroblock_plane *const p = &x->plane[plane];
424
301k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
425
301k
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
426
301k
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
427
301k
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
428
301k
  uint16_t *const eob = &p->eobs[block];
429
301k
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
430
301k
  const int16_t *src_diff;
431
301k
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
432
  // skip block condition should be handled before this is called.
433
301k
  assert(!x->skip_block);
434
435
301k
#if CONFIG_VP9_HIGHBITDEPTH
436
301k
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
437
0
    switch (tx_size) {
438
0
      case TX_32X32:
439
0
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
440
0
        vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
441
0
                                     dqcoeff, pd->dequant[0], eob);
442
0
        break;
443
0
      case TX_16X16:
444
0
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
445
0
        vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
446
0
                               dqcoeff, pd->dequant[0], eob);
447
0
        break;
448
0
      case TX_8X8:
449
0
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
450
0
        vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
451
0
                               dqcoeff, pd->dequant[0], eob);
452
0
        break;
453
0
      default:
454
0
        assert(tx_size == TX_4X4);
455
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
456
0
        vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
457
0
                               dqcoeff, pd->dequant[0], eob);
458
0
        break;
459
0
    }
460
0
    return;
461
0
  }
462
301k
#endif  // CONFIG_VP9_HIGHBITDEPTH
463
464
301k
  switch (tx_size) {
465
2.09k
    case TX_32X32:
466
2.09k
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
467
2.09k
      vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
468
2.09k
                            pd->dequant[0], eob);
469
2.09k
      break;
470
9.27k
    case TX_16X16:
471
9.27k
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
472
9.27k
      vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
473
9.27k
                      pd->dequant[0], eob);
474
9.27k
      break;
475
50.1k
    case TX_8X8:
476
50.1k
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
477
50.1k
      vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
478
50.1k
                      pd->dequant[0], eob);
479
50.1k
      break;
480
240k
    default:
481
240k
      assert(tx_size == TX_4X4);
482
240k
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
483
240k
      vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
484
240k
                      pd->dequant[0], eob);
485
240k
      break;
486
301k
  }
487
301k
}
488
489
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
490
68.2M
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
491
68.2M
  MACROBLOCKD *const xd = &x->e_mbd;
492
68.2M
  const struct macroblock_plane *const p = &x->plane[plane];
493
68.2M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
494
68.2M
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
495
68.2M
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
496
68.2M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
497
68.2M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
498
68.2M
  uint16_t *const eob = &p->eobs[block];
499
68.2M
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
500
68.2M
  const int16_t *src_diff;
501
68.2M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
502
  // skip block condition should be handled before this is called.
503
68.2M
  assert(!x->skip_block);
504
505
68.2M
#if CONFIG_VP9_HIGHBITDEPTH
506
68.2M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
507
0
    switch (tx_size) {
508
0
      case TX_32X32:
509
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
510
0
        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
511
0
                                    scan_order);
512
0
        break;
513
0
      case TX_16X16:
514
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
515
0
        vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
516
0
                              scan_order);
517
0
        break;
518
0
      case TX_8X8:
519
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
520
0
        vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
521
0
                              scan_order);
522
0
        break;
523
0
      default:
524
0
        assert(tx_size == TX_4X4);
525
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
526
0
        vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
527
0
                              scan_order);
528
0
        break;
529
0
    }
530
0
    return;
531
0
  }
532
68.2M
#endif  // CONFIG_VP9_HIGHBITDEPTH
533
534
68.2M
  switch (tx_size) {
535
1.03M
    case TX_32X32:
536
1.03M
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
537
1.03M
      vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
538
1.03M
                           scan_order);
539
1.03M
      break;
540
4.25M
    case TX_16X16:
541
4.25M
      vpx_fdct16x16(src_diff, coeff, diff_stride);
542
4.25M
      vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
543
4.25M
                     scan_order);
544
4.25M
      break;
545
17.6M
    case TX_8X8:
546
17.6M
      vpx_fdct8x8(src_diff, coeff, diff_stride);
547
17.6M
      vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
548
17.6M
                     scan_order);
549
17.6M
      break;
550
45.2M
    default:
551
45.2M
      assert(tx_size == TX_4X4);
552
45.2M
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
553
45.2M
      vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
554
45.2M
                     scan_order);
555
45.2M
      break;
556
68.2M
  }
557
68.2M
}
558
559
static void encode_block(int plane, int block, int row, int col,
560
10.9M
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
561
10.9M
  struct encode_b_args *const args = arg;
562
#if CONFIG_MISMATCH_DEBUG
563
  int mi_row = args->mi_row;
564
  int mi_col = args->mi_col;
565
  int output_enabled = args->output_enabled;
566
#endif
567
10.9M
  MACROBLOCK *const x = args->x;
568
10.9M
  MACROBLOCKD *const xd = &x->e_mbd;
569
10.9M
  struct macroblock_plane *const p = &x->plane[plane];
570
10.9M
  struct macroblockd_plane *const pd = &xd->plane[plane];
571
10.9M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
572
10.9M
  uint8_t *dst;
573
10.9M
  ENTROPY_CONTEXT *a, *l;
574
10.9M
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
575
10.9M
  a = &args->ta[col];
576
10.9M
  l = &args->tl[row];
577
578
  // TODO(jingning): per transformed block zero forcing only enabled for
579
  // luma component. will integrate chroma components as well.
580
10.9M
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
581
3.59M
    p->eobs[block] = 0;
582
3.59M
    *a = *l = 0;
583
#if CONFIG_MISMATCH_DEBUG
584
    goto encode_block_end;
585
#else
586
3.59M
    return;
587
3.59M
#endif
588
3.59M
  }
589
590
7.36M
  if (!x->skip_recode) {
591
7.36M
    if (x->quant_fp) {
592
      // Encoding process for rtc mode
593
0
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
594
        // skip forward transform
595
0
        p->eobs[block] = 0;
596
0
        *a = *l = 0;
597
#if CONFIG_MISMATCH_DEBUG
598
        goto encode_block_end;
599
#else
600
0
        return;
601
0
#endif
602
0
      } else {
603
0
        vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
604
0
      }
605
7.36M
    } else {
606
7.36M
      if (max_txsize_lookup[plane_bsize] == tx_size) {
607
4.58M
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
608
4.58M
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
609
          // full forward transform and quantization
610
4.58M
          vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
611
4.58M
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
612
          // fast path forward transform and quantization
613
0
          vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
614
0
        } else {
615
          // skip forward transform
616
0
          p->eobs[block] = 0;
617
0
          *a = *l = 0;
618
#if CONFIG_MISMATCH_DEBUG
619
          goto encode_block_end;
620
#else
621
0
          return;
622
0
#endif
623
0
        }
624
4.58M
      } else {
625
2.77M
        vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
626
2.77M
      }
627
7.36M
    }
628
7.36M
  }
629
630
7.36M
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
631
0
    const int ctx = combine_entropy_contexts(*a, *l);
632
0
    *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
633
7.36M
  } else {
634
7.36M
    *a = *l = p->eobs[block] > 0;
635
7.36M
  }
636
637
7.36M
  if (p->eobs[block]) *(args->skip) = 0;
638
639
7.36M
  if (x->skip_encode || p->eobs[block] == 0) {
640
#if CONFIG_MISMATCH_DEBUG
641
    goto encode_block_end;
642
#else
643
379k
    return;
644
379k
#endif
645
379k
  }
646
6.98M
#if CONFIG_VP9_HIGHBITDEPTH
647
6.98M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
648
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
649
0
    switch (tx_size) {
650
0
      case TX_32X32:
651
0
        vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
652
0
                                 xd->bd);
653
0
        break;
654
0
      case TX_16X16:
655
0
        vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
656
0
                                 xd->bd);
657
0
        break;
658
0
      case TX_8X8:
659
0
        vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
660
0
                               xd->bd);
661
0
        break;
662
0
      default:
663
0
        assert(tx_size == TX_4X4);
664
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
665
        // which is significant (not just an optimization) for the lossless
666
        // case.
667
0
        x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
668
0
                               xd->bd);
669
0
        break;
670
0
    }
671
#if CONFIG_MISMATCH_DEBUG
672
    goto encode_block_end;
673
#else
674
0
    return;
675
0
#endif
676
0
  }
677
6.98M
#endif  // CONFIG_VP9_HIGHBITDEPTH
678
679
6.98M
  switch (tx_size) {
680
7.59k
    case TX_32X32:
681
7.59k
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
682
7.59k
      break;
683
52.4k
    case TX_16X16:
684
52.4k
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
685
52.4k
      break;
686
362k
    case TX_8X8:
687
362k
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
688
362k
      break;
689
6.55M
    default:
690
6.55M
      assert(tx_size == TX_4X4);
691
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
692
      // which is significant (not just an optimization) for the lossless
693
      // case.
694
6.55M
      x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
695
6.55M
      break;
696
6.98M
  }
697
#if CONFIG_MISMATCH_DEBUG
698
encode_block_end:
699
  if (output_enabled) {
700
    int pixel_c, pixel_r;
701
    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
702
    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
703
    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
704
                    pd->subsampling_x, pd->subsampling_y);
705
    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
706
                             blk_w, blk_h,
707
                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
708
  }
709
#endif
710
6.98M
}
711
712
static void encode_block_pass1(int plane, int block, int row, int col,
713
                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
714
0
                               void *arg) {
715
0
  MACROBLOCK *const x = (MACROBLOCK *)arg;
716
0
  MACROBLOCKD *const xd = &x->e_mbd;
717
0
  struct macroblock_plane *const p = &x->plane[plane];
718
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
719
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
720
0
  uint8_t *dst;
721
0
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
722
723
0
  vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
724
725
0
  if (p->eobs[block] > 0) {
726
0
#if CONFIG_VP9_HIGHBITDEPTH
727
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
728
0
      x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
729
0
                             p->eobs[block], xd->bd);
730
0
      return;
731
0
    }
732
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
733
0
    x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
734
0
  }
735
0
}
736
737
0
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
738
0
  vp9_subtract_plane(x, bsize, 0);
739
0
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
740
0
                                         encode_block_pass1, x);
741
0
}
742
743
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
744
2.29M
                   int output_enabled) {
745
2.29M
  MACROBLOCKD *const xd = &x->e_mbd;
746
2.29M
  struct optimize_ctx ctx;
747
2.29M
  MODE_INFO *mi = xd->mi[0];
748
2.29M
  int plane;
749
#if CONFIG_MISMATCH_DEBUG
750
  struct encode_b_args arg = { x,
751
                               1,     // enable_trellis_opt
752
                               0.0,   // trellis_opt_thresh
753
                               NULL,  // &sse_calc_done
754
                               NULL,  // &sse
755
                               NULL,  // above entropy context
756
                               NULL,  // left entropy context
757
                               &mi->skip, mi_row, mi_col, output_enabled };
758
#else
759
2.29M
  struct encode_b_args arg = { x,
760
2.29M
                               1,     // enable_trellis_opt
761
2.29M
                               0.0,   // trellis_opt_thresh
762
2.29M
                               NULL,  // &sse_calc_done
763
2.29M
                               NULL,  // &sse
764
2.29M
                               NULL,  // above entropy context
765
2.29M
                               NULL,  // left entropy context
766
2.29M
                               &mi->skip };
767
2.29M
  (void)mi_row;
768
2.29M
  (void)mi_col;
769
2.29M
  (void)output_enabled;
770
2.29M
#endif
771
772
2.29M
  mi->skip = 1;
773
774
2.29M
  if (x->skip) return;
775
776
7.89M
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
777
5.91M
    if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
778
779
5.91M
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
780
0
      const struct macroblockd_plane *const pd = &xd->plane[plane];
781
0
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
782
0
      vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
783
0
                               ctx.tl[plane]);
784
0
      arg.enable_trellis_opt = 1;
785
5.91M
    } else {
786
5.91M
      arg.enable_trellis_opt = 0;
787
5.91M
    }
788
5.91M
    arg.ta = ctx.ta[plane];
789
5.91M
    arg.tl = ctx.tl[plane];
790
791
5.91M
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
792
5.91M
                                           &arg);
793
5.91M
  }
794
1.97M
}
795
796
void vp9_encode_block_intra(int plane, int block, int row, int col,
797
                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
798
296M
                            void *arg) {
799
296M
  struct encode_b_args *const args = arg;
800
296M
  MACROBLOCK *const x = args->x;
801
296M
  MACROBLOCKD *const xd = &x->e_mbd;
802
296M
  MODE_INFO *mi = xd->mi[0];
803
296M
  struct macroblock_plane *const p = &x->plane[plane];
804
296M
  struct macroblockd_plane *const pd = &xd->plane[plane];
805
296M
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
806
296M
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
807
296M
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
808
296M
  const ScanOrder *scan_order;
809
296M
  TX_TYPE tx_type = DCT_DCT;
810
296M
  PREDICTION_MODE mode;
811
296M
  const int bwl = b_width_log2_lookup[plane_bsize];
812
296M
  const int diff_stride = 4 * (1 << bwl);
813
296M
  uint8_t *src, *dst;
814
296M
  int16_t *src_diff;
815
296M
  uint16_t *eob = &p->eobs[block];
816
296M
  const int src_stride = p->src.stride;
817
296M
  const int dst_stride = pd->dst.stride;
818
296M
  int enable_trellis_opt = !x->skip_recode;
819
296M
  ENTROPY_CONTEXT *a = NULL;
820
296M
  ENTROPY_CONTEXT *l = NULL;
821
296M
  int entropy_ctx = 0;
822
296M
  dst = &pd->dst.buf[4 * (row * dst_stride + col)];
823
296M
  src = &p->src.buf[4 * (row * src_stride + col)];
824
296M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
825
826
296M
  if (tx_size == TX_4X4) {
827
226M
    tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
828
226M
    scan_order = &vp9_scan_orders[TX_4X4][tx_type];
829
226M
    mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
830
226M
  } else {
831
69.9M
    mode = plane == 0 ? mi->mode : mi->uv_mode;
832
69.9M
    if (tx_size == TX_32X32) {
833
2.67M
      scan_order = &vp9_default_scan_orders[TX_32X32];
834
67.3M
    } else {
835
67.3M
      tx_type = get_tx_type(get_plane_type(plane), xd);
836
67.3M
      scan_order = &vp9_scan_orders[tx_size][tx_type];
837
67.3M
    }
838
69.9M
  }
839
840
296M
  vp9_predict_intra_block(
841
296M
      xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
842
296M
      (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
843
296M
      dst_stride, col, row, plane);
844
845
  // skip block condition should be handled before this is called.
846
296M
  assert(!x->skip_block);
847
848
296M
  if (!x->skip_recode) {
849
296M
    const int tx_size_in_pixels = (1 << tx_size) << 2;
850
296M
#if CONFIG_VP9_HIGHBITDEPTH
851
296M
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
852
0
      vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
853
0
                                diff_stride, src, src_stride, dst, dst_stride,
854
0
                                xd->bd);
855
296M
    } else {
856
296M
      vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
857
296M
                         diff_stride, src, src_stride, dst, dst_stride);
858
296M
    }
859
#else
860
    vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
861
                       diff_stride, src, src_stride, dst, dst_stride);
862
#endif
863
296M
    enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col,
864
296M
                                        plane_bsize, tx_size, args);
865
296M
  }
866
867
296M
  if (enable_trellis_opt) {
868
12.6M
    a = &args->ta[col];
869
12.6M
    l = &args->tl[row];
870
12.6M
    entropy_ctx = combine_entropy_contexts(*a, *l);
871
12.6M
  }
872
873
296M
#if CONFIG_VP9_HIGHBITDEPTH
874
296M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
875
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
876
0
    switch (tx_size) {
877
0
      case TX_32X32:
878
0
        if (!x->skip_recode) {
879
0
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
880
0
          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
881
0
                                      eob, scan_order);
882
0
        }
883
0
        if (enable_trellis_opt) {
884
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
885
0
        }
886
0
        if (!x->skip_encode && *eob) {
887
0
          vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
888
0
        }
889
0
        break;
890
0
      case TX_16X16:
891
0
        if (!x->skip_recode) {
892
0
          if (tx_type == DCT_DCT)
893
0
            vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
894
0
          else
895
0
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
896
0
          vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant,
897
0
                                eob, scan_order);
898
0
        }
899
0
        if (enable_trellis_opt) {
900
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
901
0
        }
902
0
        if (!x->skip_encode && *eob) {
903
0
          vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
904
0
                                  xd->bd);
905
0
        }
906
0
        break;
907
0
      case TX_8X8:
908
0
        if (!x->skip_recode) {
909
0
          if (tx_type == DCT_DCT)
910
0
            vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
911
0
          else
912
0
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
913
0
          vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
914
0
                                scan_order);
915
0
        }
916
0
        if (enable_trellis_opt) {
917
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
918
0
        }
919
0
        if (!x->skip_encode && *eob) {
920
0
          vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
921
0
                                xd->bd);
922
0
        }
923
0
        break;
924
0
      default:
925
0
        assert(tx_size == TX_4X4);
926
0
        if (!x->skip_recode) {
927
0
          if (tx_type != DCT_DCT)
928
0
            vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
929
0
          else
930
0
            x->fwd_txfm4x4(src_diff, coeff, diff_stride);
931
0
          vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
932
0
                                scan_order);
933
0
        }
934
0
        if (enable_trellis_opt) {
935
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
936
0
        }
937
0
        if (!x->skip_encode && *eob) {
938
0
          if (tx_type == DCT_DCT) {
939
            // this is like vp9_short_idct4x4 but has a special case around
940
            // eob<=1 which is significant (not just an optimization) for the
941
            // lossless case.
942
0
            x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
943
0
          } else {
944
0
            vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
945
0
                                     xd->bd);
946
0
          }
947
0
        }
948
0
        break;
949
0
    }
950
0
    if (*eob) *(args->skip) = 0;
951
0
    return;
952
0
  }
953
296M
#endif  // CONFIG_VP9_HIGHBITDEPTH
954
955
296M
  switch (tx_size) {
956
2.67M
    case TX_32X32:
957
2.67M
      if (!x->skip_recode) {
958
2.67M
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
959
2.67M
        vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
960
2.67M
                             scan_order);
961
2.67M
      }
962
2.67M
      if (enable_trellis_opt) {
963
206k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
964
206k
      }
965
2.67M
      if (!x->skip_encode && *eob)
966
2.10M
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
967
2.67M
      break;
968
11.3M
    case TX_16X16:
969
11.3M
      if (!x->skip_recode) {
970
11.3M
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
971
11.3M
        vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
972
11.3M
                       scan_order);
973
11.3M
      }
974
11.3M
      if (enable_trellis_opt) {
975
447k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
976
447k
      }
977
11.3M
      if (!x->skip_encode && *eob)
978
9.93M
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
979
11.3M
      break;
980
55.9M
    case TX_8X8:
981
55.9M
      if (!x->skip_recode) {
982
55.9M
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
983
55.9M
        vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
984
55.9M
                       scan_order);
985
55.9M
      }
986
55.9M
      if (enable_trellis_opt) {
987
2.17M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
988
2.17M
      }
989
55.9M
      if (!x->skip_encode && *eob)
990
48.1M
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
991
55.9M
      break;
992
226M
    default:
993
226M
      assert(tx_size == TX_4X4);
994
226M
      if (!x->skip_recode) {
995
226M
        if (tx_type != DCT_DCT)
996
22.3M
          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
997
203M
        else
998
203M
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
999
226M
        vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1000
226M
                       scan_order);
1001
226M
      }
1002
226M
      if (enable_trellis_opt) {
1003
9.83M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004
9.83M
      }
1005
226M
      if (!x->skip_encode && *eob) {
1006
179M
        if (tx_type == DCT_DCT)
1007
          // this is like vp9_short_idct4x4 but has a special case around eob<=1
1008
          // which is significant (not just an optimization) for the lossless
1009
          // case.
1010
160M
          x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1011
19.0M
        else
1012
19.0M
          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1013
179M
      }
1014
226M
      break;
1015
296M
  }
1016
296M
  if (*eob) *(args->skip) = 0;
1017
296M
}
1018
1019
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1020
15.7M
                                  int enable_trellis_opt) {
1021
15.7M
  const MACROBLOCKD *const xd = &x->e_mbd;
1022
15.7M
  struct optimize_ctx ctx;
1023
#if CONFIG_MISMATCH_DEBUG
1024
  // TODO(angiebird): make mismatch_debug support intra mode
1025
  struct encode_b_args arg = {
1026
    x,
1027
    enable_trellis_opt,
1028
    0.0,   // trellis_opt_thresh
1029
    NULL,  // &sse_calc_done
1030
    NULL,  // &sse
1031
    ctx.ta[plane],
1032
    ctx.tl[plane],
1033
    &xd->mi[0]->skip,
1034
    0,  // mi_row
1035
    0,  // mi_col
1036
    0   // output_enabled
1037
  };
1038
#else
1039
15.7M
  struct encode_b_args arg = { x,
1040
15.7M
                               enable_trellis_opt,
1041
15.7M
                               0.0,   // trellis_opt_thresh
1042
15.7M
                               NULL,  // &sse_calc_done
1043
15.7M
                               NULL,  // &sse
1044
15.7M
                               ctx.ta[plane],
1045
15.7M
                               ctx.tl[plane],
1046
15.7M
                               &xd->mi[0]->skip };
1047
15.7M
#endif
1048
1049
15.7M
  if (enable_trellis_opt && x->optimize &&
1050
15.7M
      (!x->skip_recode || !x->skip_optimize)) {
1051
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1052
0
    const TX_SIZE tx_size =
1053
0
        plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1054
0
    vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1055
15.7M
  } else {
1056
15.7M
    arg.enable_trellis_opt = 0;
1057
15.7M
  }
1058
1059
15.7M
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1060
15.7M
                                         vp9_encode_block_intra, &arg);
1061
15.7M
}