Coverage Report

Created: 2025-12-31 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp9/encoder/vp9_encodemb.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "./vp9_rtcd.h"
12
#include "./vpx_config.h"
13
#include "./vpx_dsp_rtcd.h"
14
15
#include "vpx_dsp/quantize.h"
16
#include "vpx_mem/vpx_mem.h"
17
#include "vpx_ports/mem.h"
18
19
#if CONFIG_MISMATCH_DEBUG
20
#include "vpx_util/vpx_debug_util.h"
21
#endif
22
23
#include "vp9/common/vp9_idct.h"
24
#include "vp9/common/vp9_reconinter.h"
25
#include "vp9/common/vp9_reconintra.h"
26
#include "vp9/common/vp9_scan.h"
27
28
#include "vp9/encoder/vp9_encodemb.h"
29
#include "vp9/encoder/vp9_encoder.h"
30
#include "vp9/encoder/vp9_rd.h"
31
#include "vp9/encoder/vp9_tokenize.h"
32
33
struct optimize_ctx {
34
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
35
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
36
};
37
38
42.8M
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
39
42.8M
  struct macroblock_plane *const p = &x->plane[plane];
40
42.8M
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
41
42.8M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
42
42.8M
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
43
42.8M
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
44
45
42.8M
#if CONFIG_VP9_HIGHBITDEPTH
46
42.8M
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
47
0
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
48
0
                              p->src.stride, pd->dst.buf, pd->dst.stride,
49
0
                              x->e_mbd.bd);
50
0
    return;
51
0
  }
52
42.8M
#endif  // CONFIG_VP9_HIGHBITDEPTH
53
42.8M
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
54
42.8M
                     pd->dst.buf, pd->dst.stride);
55
42.8M
}
56
57
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
58
  { 10, 6 },
59
  { 8, 5 },
60
};
61
62
// 'num' can be negative, but 'shift' must be non-negative.
63
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
64
2.07M
  (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
65
66
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
67
26.8M
                   int ctx) {
68
26.8M
  MACROBLOCKD *const xd = &mb->e_mbd;
69
26.8M
  struct macroblock_plane *const p = &mb->plane[plane];
70
26.8M
  struct macroblockd_plane *const pd = &xd->plane[plane];
71
26.8M
  const int ref = is_inter_block(xd->mi[0]);
72
26.8M
  uint8_t token_cache[1024];
73
26.8M
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
74
26.8M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
75
26.8M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
76
26.8M
  const int eob = p->eobs[block];
77
26.8M
  const PLANE_TYPE plane_type = get_plane_type(plane);
78
26.8M
  const int default_eob = 16 << (tx_size << 1);
79
26.8M
  const int shift = (tx_size == TX_32X32);
80
26.8M
  const int16_t *const dequant_ptr = pd->dequant;
81
26.8M
  const uint8_t *const band_translate = get_band_translate(tx_size);
82
26.8M
  const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block);
83
26.8M
  const int16_t *const scan = so->scan;
84
26.8M
  const int16_t *const nb = so->neighbors;
85
26.8M
  const MODE_INFO *mbmi = xd->mi[0];
86
26.8M
  const int sharpness = mb->sharpness;
87
26.8M
  const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
88
26.8M
  const int64_t rdmult =
89
26.8M
      (sharpness == 0 ? rdadj >> 1
90
26.8M
                      : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
91
92
26.8M
  const int64_t rddiv = mb->rddiv;
93
26.8M
  int64_t rd_cost0, rd_cost1;
94
26.8M
  int64_t rate0, rate1;
95
26.8M
  int16_t t0, t1;
96
26.8M
  int i, final_eob;
97
26.8M
  int count_high_values_after_eob = 0;
98
26.8M
#if CONFIG_VP9_HIGHBITDEPTH
99
26.8M
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
100
#else
101
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
102
#endif
103
26.8M
  unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
104
26.8M
      mb->token_costs[tx_size][plane_type][ref];
105
26.8M
  unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
106
26.8M
  int64_t eob_cost0, eob_cost1;
107
26.8M
  const int ctx0 = ctx;
108
26.8M
  int64_t accu_rate = 0;
109
  // Initialized to the worst possible error for the largest transform size.
110
  // This ensures that it never goes negative.
111
26.8M
  int64_t accu_error = ((int64_t)1) << 50;
112
26.8M
  int64_t best_block_rd_cost = INT64_MAX;
113
26.8M
  int x_prev = 1;
114
26.8M
  tran_low_t before_best_eob_qc = 0;
115
26.8M
  tran_low_t before_best_eob_dqc = 0;
116
117
26.8M
  assert((!plane_type && !plane) || (plane_type && plane));
118
26.8M
  assert(eob <= default_eob);
119
120
539M
  for (i = 0; i < eob; i++) {
121
513M
    const int rc = scan[i];
122
513M
    token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
123
513M
  }
124
26.8M
  final_eob = 0;
125
126
  // Initial RD cost.
127
26.8M
  token_costs_cur = token_costs + band_translate[0];
128
26.8M
  rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
129
26.8M
  best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
130
131
  // For each token, pick one of two choices greedily:
132
  // (i) First candidate: Keep current quantized value, OR
133
  // (ii) Second candidate: Reduce quantized value by 1.
134
539M
  for (i = 0; i < eob; i++) {
135
513M
    const int rc = scan[i];
136
513M
    const int x = qcoeff[rc];
137
513M
    const int band_cur = band_translate[i];
138
513M
    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
139
513M
    const int token_tree_sel_cur = (x_prev == 0);
140
513M
    token_costs_cur = token_costs + band_cur;
141
513M
    if (x == 0) {  // No need to search
142
218M
      const int token = vp9_get_token(x);
143
218M
      rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
144
218M
      accu_rate += rate0;
145
218M
      x_prev = 0;
146
      // Note: accu_error does not change.
147
294M
    } else {
148
294M
      const int dqv = dequant_ptr[rc != 0];
149
      // Compute the distortion for quantizing to 0.
150
294M
      const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
151
294M
      const int diff_for_zero =
152
294M
#if CONFIG_VP9_HIGHBITDEPTH
153
294M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
154
294M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
155
294M
              :
156
294M
#endif
157
294M
              diff_for_zero_raw;
158
294M
      const int64_t distortion_for_zero =
159
294M
          (int64_t)diff_for_zero * diff_for_zero;
160
161
      // Compute the distortion for the first candidate
162
294M
      const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
163
294M
      const int diff0 =
164
294M
#if CONFIG_VP9_HIGHBITDEPTH
165
294M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
166
294M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
167
294M
              :
168
294M
#endif  // CONFIG_VP9_HIGHBITDEPTH
169
294M
              diff0_raw;
170
294M
      const int64_t distortion0 = (int64_t)diff0 * diff0;
171
172
      // Compute the distortion for the second candidate
173
294M
      const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
174
294M
      const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
175
294M
      int64_t distortion1;
176
294M
      if (x1 != 0) {
177
192M
        const int dqv_step =
178
192M
#if CONFIG_VP9_HIGHBITDEPTH
179
192M
            (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
180
192M
                                                          :
181
192M
#endif  // CONFIG_VP9_HIGHBITDEPTH
182
192M
                                                          dqv;
183
192M
        const int diff_step = (dqv_step + sign) ^ sign;
184
192M
        const int diff1 = diff0 - diff_step;
185
192M
        assert(dqv > 0);  // We aren't right shifting a negative number above.
186
192M
        distortion1 = (int64_t)diff1 * diff1;
187
192M
      } else {
188
101M
        distortion1 = distortion_for_zero;
189
101M
      }
190
294M
      {
191
        // Calculate RDCost for current coeff for the two candidates.
192
294M
        const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
193
294M
        const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
194
294M
        rate0 =
195
294M
            base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
196
294M
        rate1 =
197
294M
            base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
198
294M
      }
199
294M
      {
200
294M
        int rdcost_better_for_x1, eob_rdcost_better_for_x1;
201
294M
        int dqc0, dqc1;
202
294M
        int64_t best_eob_cost_cur;
203
294M
        int use_x1;
204
205
        // Calculate RD Cost effect on the next coeff for the two candidates.
206
294M
        int64_t next_bits0 = 0;
207
294M
        int64_t next_bits1 = 0;
208
294M
        int64_t next_eob_bits0 = 0;
209
294M
        int64_t next_eob_bits1 = 0;
210
294M
        if (i < default_eob - 1) {
211
287M
          int ctx_next, token_tree_sel_next;
212
287M
          const int band_next = band_translate[i + 1];
213
287M
          const int token_next =
214
287M
              (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
215
287M
          unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
216
287M
                                               [ENTROPY_TOKENS] =
217
287M
                                                   token_costs + band_next;
218
287M
          token_cache[rc] = vp9_pt_energy_class[t0];
219
287M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
220
287M
          token_tree_sel_next = (x == 0);
221
287M
          next_bits0 =
222
287M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
223
287M
          next_eob_bits0 =
224
287M
              (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
225
287M
          token_cache[rc] = vp9_pt_energy_class[t1];
226
287M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
227
287M
          token_tree_sel_next = (x1 == 0);
228
287M
          next_bits1 =
229
287M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
230
287M
          if (x1 != 0) {
231
188M
            next_eob_bits1 =
232
188M
                (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
233
188M
          }
234
287M
        }
235
236
        // Compare the total RD costs for two candidates.
237
294M
        rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
238
294M
        rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
239
294M
        rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
240
294M
        eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
241
294M
                           (accu_error + distortion0 - distortion_for_zero));
242
294M
        eob_cost1 = eob_cost0;
243
294M
        if (x1 != 0) {
244
192M
          eob_cost1 =
245
192M
              RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
246
192M
                     (accu_error + distortion1 - distortion_for_zero));
247
192M
          eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
248
192M
        } else {
249
101M
          eob_rdcost_better_for_x1 = 0;
250
101M
        }
251
252
        // Calculate the two candidate de-quantized values.
253
294M
        dqc0 = dqcoeff[rc];
254
294M
        dqc1 = 0;
255
294M
        if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
256
4.38M
          if (x1 != 0) {
257
2.07M
            dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
258
2.31M
          } else {
259
2.31M
            dqc1 = 0;
260
2.31M
          }
261
4.38M
        }
262
263
        // Pick and record the better quantized and de-quantized values.
264
294M
        if (rdcost_better_for_x1) {
265
4.24M
          qcoeff[rc] = x1;
266
4.24M
          dqcoeff[rc] = dqc1;
267
4.24M
          accu_rate += rate1;
268
4.24M
          accu_error += distortion1 - distortion_for_zero;
269
4.24M
          assert(distortion1 <= distortion_for_zero);
270
4.24M
          token_cache[rc] = vp9_pt_energy_class[t1];
271
289M
        } else {
272
289M
          accu_rate += rate0;
273
289M
          accu_error += distortion0 - distortion_for_zero;
274
289M
          assert(distortion0 <= distortion_for_zero);
275
289M
          token_cache[rc] = vp9_pt_energy_class[t0];
276
289M
        }
277
294M
        if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
278
294M
        assert(accu_error >= 0);
279
294M
        x_prev = qcoeff[rc];  // Update based on selected quantized value.
280
281
294M
        use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
282
294M
        best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
283
284
        // Determine whether to move the eob position to i+1
285
294M
        if (best_eob_cost_cur < best_block_rd_cost) {
286
279M
          best_block_rd_cost = best_eob_cost_cur;
287
279M
          final_eob = i + 1;
288
279M
          count_high_values_after_eob = 0;
289
279M
          if (use_x1) {
290
2.05M
            before_best_eob_qc = x1;
291
2.05M
            before_best_eob_dqc = dqc1;
292
277M
          } else {
293
277M
            before_best_eob_qc = x;
294
277M
            before_best_eob_dqc = dqc0;
295
277M
          }
296
279M
        }
297
294M
      }
298
294M
    }
299
513M
  }
300
26.8M
  if (count_high_values_after_eob > 0) {
301
0
    final_eob = eob - 1;
302
0
    for (; final_eob >= 0; final_eob--) {
303
0
      const int rc = scan[final_eob];
304
0
      const int x = qcoeff[rc];
305
0
      if (x) {
306
0
        break;
307
0
      }
308
0
    }
309
0
    final_eob++;
310
26.8M
  } else {
311
26.8M
    assert(final_eob <= eob);
312
26.8M
    if (final_eob > 0) {
313
14.8M
      int rc;
314
14.8M
      assert(before_best_eob_qc != 0);
315
14.8M
      i = final_eob - 1;
316
14.8M
      rc = scan[i];
317
14.8M
      qcoeff[rc] = before_best_eob_qc;
318
14.8M
      dqcoeff[rc] = before_best_eob_dqc;
319
14.8M
    }
320
58.1M
    for (i = final_eob; i < eob; i++) {
321
31.3M
      int rc = scan[i];
322
31.3M
      qcoeff[rc] = 0;
323
31.3M
      dqcoeff[rc] = 0;
324
31.3M
    }
325
26.8M
  }
326
26.8M
  mb->plane[plane].eobs[block] = final_eob;
327
26.8M
  return final_eob;
328
26.8M
}
329
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
330
331
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
332
4.79M
                             tran_low_t *dst, int src_stride) {
333
4.79M
  if (rd_transform)
334
4.60M
    vpx_fdct32x32_rd(src, dst, src_stride);
335
195k
  else
336
195k
    vpx_fdct32x32(src, dst, src_stride);
337
4.79M
}
338
339
#if CONFIG_VP9_HIGHBITDEPTH
340
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
341
0
                                    tran_low_t *dst, int src_stride) {
342
0
  if (rd_transform)
343
0
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
344
0
  else
345
0
    vpx_highbd_fdct32x32(src, dst, src_stride);
346
0
}
347
#endif  // CONFIG_VP9_HIGHBITDEPTH
348
349
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
350
0
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
351
0
  MACROBLOCKD *const xd = &x->e_mbd;
352
0
  const struct macroblock_plane *const p = &x->plane[plane];
353
0
  const struct macroblockd_plane *const pd = &xd->plane[plane];
354
0
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
355
0
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
356
0
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
357
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
358
0
  uint16_t *const eob = &p->eobs[block];
359
0
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
360
0
  const int16_t *src_diff;
361
0
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
362
  // skip block condition should be handled before this is called.
363
0
  assert(!x->skip_block);
364
365
0
#if CONFIG_VP9_HIGHBITDEPTH
366
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
367
0
    switch (tx_size) {
368
0
      case TX_32X32:
369
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
370
0
        vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff,
371
0
                                     pd->dequant, eob, scan_order);
372
0
        break;
373
0
      case TX_16X16:
374
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375
0
        vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
376
0
                               scan_order);
377
0
        break;
378
0
      case TX_8X8:
379
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
380
0
        vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
381
0
                               scan_order);
382
0
        break;
383
0
      default:
384
0
        assert(tx_size == TX_4X4);
385
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
386
0
        vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
387
0
                               scan_order);
388
0
        break;
389
0
    }
390
0
    return;
391
0
  }
392
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
393
394
0
  switch (tx_size) {
395
0
    case TX_32X32:
396
0
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
397
0
      vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob,
398
0
                            scan_order);
399
0
      break;
400
0
    case TX_16X16:
401
0
      vpx_fdct16x16(src_diff, coeff, diff_stride);
402
0
      vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
403
0
                      scan_order);
404
0
      break;
405
0
    case TX_8X8:
406
0
      vpx_fdct8x8(src_diff, coeff, diff_stride);
407
0
      vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
408
0
                      scan_order);
409
410
0
      break;
411
0
    default:
412
0
      assert(tx_size == TX_4X4);
413
0
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
414
0
      vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
415
0
                      scan_order);
416
0
      break;
417
0
  }
418
0
}
419
420
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
421
457k
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
422
457k
  MACROBLOCKD *const xd = &x->e_mbd;
423
457k
  const struct macroblock_plane *const p = &x->plane[plane];
424
457k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
425
457k
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
426
457k
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
427
457k
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
428
457k
  uint16_t *const eob = &p->eobs[block];
429
457k
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
430
457k
  const int16_t *src_diff;
431
457k
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
432
  // skip block condition should be handled before this is called.
433
457k
  assert(!x->skip_block);
434
435
457k
#if CONFIG_VP9_HIGHBITDEPTH
436
457k
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
437
0
    switch (tx_size) {
438
0
      case TX_32X32:
439
0
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
440
0
        vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
441
0
                                     dqcoeff, pd->dequant[0], eob);
442
0
        break;
443
0
      case TX_16X16:
444
0
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
445
0
        vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
446
0
                               dqcoeff, pd->dequant[0], eob);
447
0
        break;
448
0
      case TX_8X8:
449
0
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
450
0
        vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
451
0
                               dqcoeff, pd->dequant[0], eob);
452
0
        break;
453
0
      default:
454
0
        assert(tx_size == TX_4X4);
455
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
456
0
        vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
457
0
                               dqcoeff, pd->dequant[0], eob);
458
0
        break;
459
0
    }
460
0
    return;
461
0
  }
462
457k
#endif  // CONFIG_VP9_HIGHBITDEPTH
463
464
457k
  switch (tx_size) {
465
3.83k
    case TX_32X32:
466
3.83k
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
467
3.83k
      vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
468
3.83k
                            pd->dequant[0], eob);
469
3.83k
      break;
470
11.1k
    case TX_16X16:
471
11.1k
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
472
11.1k
      vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
473
11.1k
                      pd->dequant[0], eob);
474
11.1k
      break;
475
72.7k
    case TX_8X8:
476
72.7k
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
477
72.7k
      vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
478
72.7k
                      pd->dequant[0], eob);
479
72.7k
      break;
480
369k
    default:
481
369k
      assert(tx_size == TX_4X4);
482
369k
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
483
369k
      vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
484
369k
                      pd->dequant[0], eob);
485
369k
      break;
486
457k
  }
487
457k
}
488
489
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
490
66.5M
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
491
66.5M
  MACROBLOCKD *const xd = &x->e_mbd;
492
66.5M
  const struct macroblock_plane *const p = &x->plane[plane];
493
66.5M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
494
66.5M
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
495
66.5M
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
496
66.5M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
497
66.5M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
498
66.5M
  uint16_t *const eob = &p->eobs[block];
499
66.5M
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
500
66.5M
  const int16_t *src_diff;
501
66.5M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
502
  // skip block condition should be handled before this is called.
503
66.5M
  assert(!x->skip_block);
504
505
66.5M
#if CONFIG_VP9_HIGHBITDEPTH
506
66.5M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
507
0
    switch (tx_size) {
508
0
      case TX_32X32:
509
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
510
0
        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
511
0
                                    scan_order);
512
0
        break;
513
0
      case TX_16X16:
514
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
515
0
        vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
516
0
                              scan_order);
517
0
        break;
518
0
      case TX_8X8:
519
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
520
0
        vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
521
0
                              scan_order);
522
0
        break;
523
0
      default:
524
0
        assert(tx_size == TX_4X4);
525
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
526
0
        vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
527
0
                              scan_order);
528
0
        break;
529
0
    }
530
0
    return;
531
0
  }
532
66.5M
#endif  // CONFIG_VP9_HIGHBITDEPTH
533
534
66.5M
  switch (tx_size) {
535
1.07M
    case TX_32X32:
536
1.07M
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
537
1.07M
      vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
538
1.07M
                           scan_order);
539
1.07M
      break;
540
4.49M
    case TX_16X16:
541
4.49M
      vpx_fdct16x16(src_diff, coeff, diff_stride);
542
4.49M
      vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
543
4.49M
                     scan_order);
544
4.49M
      break;
545
18.6M
    case TX_8X8:
546
18.6M
      vpx_fdct8x8(src_diff, coeff, diff_stride);
547
18.6M
      vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
548
18.6M
                     scan_order);
549
18.6M
      break;
550
42.3M
    default:
551
42.3M
      assert(tx_size == TX_4X4);
552
42.3M
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
553
42.3M
      vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
554
42.3M
                     scan_order);
555
42.3M
      break;
556
66.5M
  }
557
66.5M
}
558
559
static void encode_block(int plane, int block, int row, int col,
560
9.99M
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
561
9.99M
  struct encode_b_args *const args = arg;
562
#if CONFIG_MISMATCH_DEBUG
563
  int mi_row = args->mi_row;
564
  int mi_col = args->mi_col;
565
  int output_enabled = args->output_enabled;
566
#endif
567
9.99M
  MACROBLOCK *const x = args->x;
568
9.99M
  MACROBLOCKD *const xd = &x->e_mbd;
569
9.99M
  struct macroblock_plane *const p = &x->plane[plane];
570
9.99M
  struct macroblockd_plane *const pd = &xd->plane[plane];
571
9.99M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
572
9.99M
  uint8_t *dst;
573
9.99M
  ENTROPY_CONTEXT *a, *l;
574
9.99M
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
575
9.99M
  a = &args->ta[col];
576
9.99M
  l = &args->tl[row];
577
578
  // TODO(jingning): per transformed block zero forcing only enabled for
579
  // luma component. will integrate chroma components as well.
580
9.99M
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
581
3.27M
    p->eobs[block] = 0;
582
3.27M
    *a = *l = 0;
583
#if CONFIG_MISMATCH_DEBUG
584
    goto encode_block_end;
585
#else
586
3.27M
    return;
587
3.27M
#endif
588
3.27M
  }
589
590
6.72M
  if (!x->skip_recode) {
591
6.72M
    if (x->quant_fp) {
592
      // Encoding process for rtc mode
593
0
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
594
        // skip forward transform
595
0
        p->eobs[block] = 0;
596
0
        *a = *l = 0;
597
#if CONFIG_MISMATCH_DEBUG
598
        goto encode_block_end;
599
#else
600
0
        return;
601
0
#endif
602
0
      } else {
603
0
        vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
604
0
      }
605
6.72M
    } else {
606
6.72M
      if (max_txsize_lookup[plane_bsize] == tx_size) {
607
4.37M
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
608
4.37M
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
609
          // full forward transform and quantization
610
4.37M
          vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
611
4.37M
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
612
          // fast path forward transform and quantization
613
0
          vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
614
0
        } else {
615
          // skip forward transform
616
0
          p->eobs[block] = 0;
617
0
          *a = *l = 0;
618
#if CONFIG_MISMATCH_DEBUG
619
          goto encode_block_end;
620
#else
621
0
          return;
622
0
#endif
623
0
        }
624
4.37M
      } else {
625
2.34M
        vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
626
2.34M
      }
627
6.72M
    }
628
6.72M
  }
629
630
6.72M
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
631
0
    const int ctx = combine_entropy_contexts(*a, *l);
632
0
    *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
633
6.72M
  } else {
634
6.72M
    *a = *l = p->eobs[block] > 0;
635
6.72M
  }
636
637
6.72M
  if (p->eobs[block]) *(args->skip) = 0;
638
639
6.72M
  if (x->skip_encode || p->eobs[block] == 0) {
640
#if CONFIG_MISMATCH_DEBUG
641
    goto encode_block_end;
642
#else
643
587k
    return;
644
587k
#endif
645
587k
  }
646
6.13M
#if CONFIG_VP9_HIGHBITDEPTH
647
6.13M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
648
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
649
0
    switch (tx_size) {
650
0
      case TX_32X32:
651
0
        vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
652
0
                                 xd->bd);
653
0
        break;
654
0
      case TX_16X16:
655
0
        vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
656
0
                                 xd->bd);
657
0
        break;
658
0
      case TX_8X8:
659
0
        vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
660
0
                               xd->bd);
661
0
        break;
662
0
      default:
663
0
        assert(tx_size == TX_4X4);
664
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
665
        // which is significant (not just an optimization) for the lossless
666
        // case.
667
0
        x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
668
0
                               xd->bd);
669
0
        break;
670
0
    }
671
#if CONFIG_MISMATCH_DEBUG
672
    goto encode_block_end;
673
#else
674
0
    return;
675
0
#endif
676
0
  }
677
6.13M
#endif  // CONFIG_VP9_HIGHBITDEPTH
678
679
6.13M
  switch (tx_size) {
680
8.37k
    case TX_32X32:
681
8.37k
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
682
8.37k
      break;
683
62.0k
    case TX_16X16:
684
62.0k
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
685
62.0k
      break;
686
424k
    case TX_8X8:
687
424k
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
688
424k
      break;
689
5.63M
    default:
690
5.63M
      assert(tx_size == TX_4X4);
691
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
692
      // which is significant (not just an optimization) for the lossless
693
      // case.
694
5.63M
      x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
695
5.63M
      break;
696
6.13M
  }
697
#if CONFIG_MISMATCH_DEBUG
698
encode_block_end:
699
  if (output_enabled) {
700
    int pixel_c, pixel_r;
701
    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
702
    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
703
    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
704
                    pd->subsampling_x, pd->subsampling_y);
705
    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
706
                             blk_w, blk_h,
707
                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
708
  }
709
#endif
710
6.13M
}
711
712
static void encode_block_pass1(int plane, int block, int row, int col,
713
                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
714
0
                               void *arg) {
715
0
  MACROBLOCK *const x = (MACROBLOCK *)arg;
716
0
  MACROBLOCKD *const xd = &x->e_mbd;
717
0
  struct macroblock_plane *const p = &x->plane[plane];
718
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
719
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
720
0
  uint8_t *dst;
721
0
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
722
723
0
  vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
724
725
0
  if (p->eobs[block] > 0) {
726
0
#if CONFIG_VP9_HIGHBITDEPTH
727
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
728
0
      x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
729
0
                             p->eobs[block], xd->bd);
730
0
      return;
731
0
    }
732
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
733
0
    x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
734
0
  }
735
0
}
736
737
0
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
738
0
  vp9_subtract_plane(x, bsize, 0);
739
0
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
740
0
                                         encode_block_pass1, x);
741
0
}
742
743
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
744
2.26M
                   int output_enabled) {
745
2.26M
  MACROBLOCKD *const xd = &x->e_mbd;
746
2.26M
  struct optimize_ctx ctx;
747
2.26M
  MODE_INFO *mi = xd->mi[0];
748
2.26M
  int plane;
749
#if CONFIG_MISMATCH_DEBUG
750
  struct encode_b_args arg = { x,
751
                               1,     // enable_trellis_opt
752
                               0.0,   // trellis_opt_thresh
753
                               NULL,  // &sse_calc_done
754
                               NULL,  // &sse
755
                               NULL,  // above entropy context
756
                               NULL,  // left entropy context
757
                               &mi->skip, mi_row, mi_col, output_enabled };
758
#else
759
2.26M
  struct encode_b_args arg = { x,
760
2.26M
                               1,     // enable_trellis_opt
761
2.26M
                               0.0,   // trellis_opt_thresh
762
2.26M
                               NULL,  // &sse_calc_done
763
2.26M
                               NULL,  // &sse
764
2.26M
                               NULL,  // above entropy context
765
2.26M
                               NULL,  // left entropy context
766
2.26M
                               &mi->skip };
767
2.26M
  (void)mi_row;
768
2.26M
  (void)mi_col;
769
2.26M
  (void)output_enabled;
770
2.26M
#endif
771
772
2.26M
  mi->skip = 1;
773
774
2.26M
  if (x->skip) return;
775
776
7.27M
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
777
5.45M
    if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
778
779
5.45M
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
780
0
      const struct macroblockd_plane *const pd = &xd->plane[plane];
781
0
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
782
0
      vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
783
0
                               ctx.tl[plane]);
784
0
      arg.enable_trellis_opt = 1;
785
5.45M
    } else {
786
5.45M
      arg.enable_trellis_opt = 0;
787
5.45M
    }
788
5.45M
    arg.ta = ctx.ta[plane];
789
5.45M
    arg.tl = ctx.tl[plane];
790
791
5.45M
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
792
5.45M
                                           &arg);
793
5.45M
  }
794
1.81M
}
795
796
void vp9_encode_block_intra(int plane, int block, int row, int col,
797
                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
798
334M
                            void *arg) {
799
334M
  struct encode_b_args *const args = arg;
800
334M
  MACROBLOCK *const x = args->x;
801
334M
  MACROBLOCKD *const xd = &x->e_mbd;
802
334M
  MODE_INFO *mi = xd->mi[0];
803
334M
  struct macroblock_plane *const p = &x->plane[plane];
804
334M
  struct macroblockd_plane *const pd = &xd->plane[plane];
805
334M
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
806
334M
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
807
334M
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
808
334M
  const ScanOrder *scan_order;
809
334M
  TX_TYPE tx_type = DCT_DCT;
810
334M
  PREDICTION_MODE mode;
811
334M
  const int bwl = b_width_log2_lookup[plane_bsize];
812
334M
  const int diff_stride = 4 * (1 << bwl);
813
334M
  uint8_t *src, *dst;
814
334M
  int16_t *src_diff;
815
334M
  uint16_t *eob = &p->eobs[block];
816
334M
  const int src_stride = p->src.stride;
817
334M
  const int dst_stride = pd->dst.stride;
818
334M
  int enable_trellis_opt = !x->skip_recode;
819
334M
  ENTROPY_CONTEXT *a = NULL;
820
334M
  ENTROPY_CONTEXT *l = NULL;
821
334M
  int entropy_ctx = 0;
822
334M
  dst = &pd->dst.buf[4 * (row * dst_stride + col)];
823
334M
  src = &p->src.buf[4 * (row * src_stride + col)];
824
334M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
825
826
334M
  if (tx_size == TX_4X4) {
827
248M
    tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
828
248M
    scan_order = &vp9_scan_orders[TX_4X4][tx_type];
829
248M
    mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
830
248M
  } else {
831
85.8M
    mode = plane == 0 ? mi->mode : mi->uv_mode;
832
85.8M
    if (tx_size == TX_32X32) {
833
3.72M
      scan_order = &vp9_default_scan_orders[TX_32X32];
834
82.0M
    } else {
835
82.0M
      tx_type = get_tx_type(get_plane_type(plane), xd);
836
82.0M
      scan_order = &vp9_scan_orders[tx_size][tx_type];
837
82.0M
    }
838
85.8M
  }
839
840
334M
  vp9_predict_intra_block(
841
334M
      xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
842
334M
      (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
843
334M
      dst_stride, col, row, plane);
844
845
  // skip block condition should be handled before this is called.
846
334M
  assert(!x->skip_block);
847
848
334M
  if (!x->skip_recode) {
849
334M
    const int tx_size_in_pixels = (1 << tx_size) << 2;
850
334M
#if CONFIG_VP9_HIGHBITDEPTH
851
334M
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
852
0
      vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
853
0
                                diff_stride, src, src_stride, dst, dst_stride,
854
0
                                xd->bd);
855
334M
    } else {
856
334M
      vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
857
334M
                         diff_stride, src, src_stride, dst, dst_stride);
858
334M
    }
859
#else
860
    vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
861
                       diff_stride, src, src_stride, dst, dst_stride);
862
#endif
863
334M
    enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col,
864
334M
                                        plane_bsize, tx_size, args);
865
334M
  }
866
867
334M
  if (enable_trellis_opt) {
868
22.8M
    a = &args->ta[col];
869
22.8M
    l = &args->tl[row];
870
22.8M
    entropy_ctx = combine_entropy_contexts(*a, *l);
871
22.8M
  }
872
873
334M
#if CONFIG_VP9_HIGHBITDEPTH
874
334M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
875
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
876
0
    switch (tx_size) {
877
0
      case TX_32X32:
878
0
        if (!x->skip_recode) {
879
0
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
880
0
          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
881
0
                                      eob, scan_order);
882
0
        }
883
0
        if (enable_trellis_opt) {
884
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
885
0
        }
886
0
        if (!x->skip_encode && *eob) {
887
0
          vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
888
0
        }
889
0
        break;
890
0
      case TX_16X16:
891
0
        if (!x->skip_recode) {
892
0
          if (tx_type == DCT_DCT)
893
0
            vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
894
0
          else
895
0
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
896
0
          vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant,
897
0
                                eob, scan_order);
898
0
        }
899
0
        if (enable_trellis_opt) {
900
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
901
0
        }
902
0
        if (!x->skip_encode && *eob) {
903
0
          vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
904
0
                                  xd->bd);
905
0
        }
906
0
        break;
907
0
      case TX_8X8:
908
0
        if (!x->skip_recode) {
909
0
          if (tx_type == DCT_DCT)
910
0
            vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
911
0
          else
912
0
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
913
0
          vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
914
0
                                scan_order);
915
0
        }
916
0
        if (enable_trellis_opt) {
917
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
918
0
        }
919
0
        if (!x->skip_encode && *eob) {
920
0
          vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
921
0
                                xd->bd);
922
0
        }
923
0
        break;
924
0
      default:
925
0
        assert(tx_size == TX_4X4);
926
0
        if (!x->skip_recode) {
927
0
          if (tx_type != DCT_DCT)
928
0
            vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
929
0
          else
930
0
            x->fwd_txfm4x4(src_diff, coeff, diff_stride);
931
0
          vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
932
0
                                scan_order);
933
0
        }
934
0
        if (enable_trellis_opt) {
935
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
936
0
        }
937
0
        if (!x->skip_encode && *eob) {
938
0
          if (tx_type == DCT_DCT) {
939
            // this is like vp9_short_idct4x4 but has a special case around
940
            // eob<=1 which is significant (not just an optimization) for the
941
            // lossless case.
942
0
            x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
943
0
          } else {
944
0
            vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
945
0
                                     xd->bd);
946
0
          }
947
0
        }
948
0
        break;
949
0
    }
950
0
    if (*eob) *(args->skip) = 0;
951
0
    return;
952
0
  }
953
334M
#endif  // CONFIG_VP9_HIGHBITDEPTH
954
955
334M
  switch (tx_size) {
956
3.72M
    case TX_32X32:
957
3.72M
      if (!x->skip_recode) {
958
3.72M
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
959
3.72M
        vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
960
3.72M
                             scan_order);
961
3.72M
      }
962
3.72M
      if (enable_trellis_opt) {
963
490k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
964
490k
      }
965
3.72M
      if (!x->skip_encode && *eob)
966
2.70M
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
967
3.72M
      break;
968
14.3M
    case TX_16X16:
969
14.3M
      if (!x->skip_recode) {
970
14.3M
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
971
14.3M
        vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
972
14.3M
                       scan_order);
973
14.3M
      }
974
14.3M
      if (enable_trellis_opt) {
975
972k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
976
972k
      }
977
14.3M
      if (!x->skip_encode && *eob)
978
11.9M
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
979
14.3M
      break;
980
67.7M
    case TX_8X8:
981
67.7M
      if (!x->skip_recode) {
982
67.7M
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
983
67.7M
        vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
984
67.7M
                       scan_order);
985
67.7M
      }
986
67.7M
      if (enable_trellis_opt) {
987
4.24M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
988
4.24M
      }
989
67.7M
      if (!x->skip_encode && *eob)
990
55.2M
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
991
67.7M
      break;
992
248M
    default:
993
248M
      assert(tx_size == TX_4X4);
994
248M
      if (!x->skip_recode) {
995
248M
        if (tx_type != DCT_DCT)
996
22.0M
          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
997
226M
        else
998
226M
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
999
248M
        vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1000
248M
                       scan_order);
1001
248M
      }
1002
248M
      if (enable_trellis_opt) {
1003
17.1M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004
17.1M
      }
1005
248M
      if (!x->skip_encode && *eob) {
1006
191M
        if (tx_type == DCT_DCT)
1007
          // this is like vp9_short_idct4x4 but has a special case around eob<=1
1008
          // which is significant (not just an optimization) for the lossless
1009
          // case.
1010
173M
          x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1011
17.7M
        else
1012
17.7M
          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1013
191M
      }
1014
248M
      break;
1015
334M
  }
1016
334M
  if (*eob) *(args->skip) = 0;
1017
334M
}
1018
1019
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1020
17.6M
                                  int enable_trellis_opt) {
1021
17.6M
  const MACROBLOCKD *const xd = &x->e_mbd;
1022
17.6M
  struct optimize_ctx ctx;
1023
#if CONFIG_MISMATCH_DEBUG
1024
  // TODO(angiebird): make mismatch_debug support intra mode
1025
  struct encode_b_args arg = {
1026
    x,
1027
    enable_trellis_opt,
1028
    0.0,   // trellis_opt_thresh
1029
    NULL,  // &sse_calc_done
1030
    NULL,  // &sse
1031
    ctx.ta[plane],
1032
    ctx.tl[plane],
1033
    &xd->mi[0]->skip,
1034
    0,  // mi_row
1035
    0,  // mi_col
1036
    0   // output_enabled
1037
  };
1038
#else
1039
17.6M
  struct encode_b_args arg = { x,
1040
17.6M
                               enable_trellis_opt,
1041
17.6M
                               0.0,   // trellis_opt_thresh
1042
17.6M
                               NULL,  // &sse_calc_done
1043
17.6M
                               NULL,  // &sse
1044
17.6M
                               ctx.ta[plane],
1045
17.6M
                               ctx.tl[plane],
1046
17.6M
                               &xd->mi[0]->skip };
1047
17.6M
#endif
1048
1049
17.6M
  if (enable_trellis_opt && x->optimize &&
1050
0
      (!x->skip_recode || !x->skip_optimize)) {
1051
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1052
0
    const TX_SIZE tx_size =
1053
0
        plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1054
0
    vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1055
17.6M
  } else {
1056
17.6M
    arg.enable_trellis_opt = 0;
1057
17.6M
  }
1058
1059
17.6M
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1060
17.6M
                                         vp9_encode_block_intra, &arg);
1061
17.6M
}