Coverage Report

Created: 2025-08-28 07:12

/src/libvpx/vp9/encoder/vp9_encodemb.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "./vp9_rtcd.h"
12
#include "./vpx_config.h"
13
#include "./vpx_dsp_rtcd.h"
14
15
#include "vpx_dsp/quantize.h"
16
#include "vpx_mem/vpx_mem.h"
17
#include "vpx_ports/mem.h"
18
19
#if CONFIG_MISMATCH_DEBUG
20
#include "vpx_util/vpx_debug_util.h"
21
#endif
22
23
#include "vp9/common/vp9_idct.h"
24
#include "vp9/common/vp9_reconinter.h"
25
#include "vp9/common/vp9_reconintra.h"
26
#include "vp9/common/vp9_scan.h"
27
28
#include "vp9/encoder/vp9_encodemb.h"
29
#include "vp9/encoder/vp9_encoder.h"
30
#include "vp9/encoder/vp9_rd.h"
31
#include "vp9/encoder/vp9_tokenize.h"
32
33
struct optimize_ctx {
34
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
35
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
36
};
37
38
45.3M
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
39
45.3M
  struct macroblock_plane *const p = &x->plane[plane];
40
45.3M
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
41
45.3M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
42
45.3M
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
43
45.3M
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
44
45
45.3M
#if CONFIG_VP9_HIGHBITDEPTH
46
45.3M
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
47
0
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
48
0
                              p->src.stride, pd->dst.buf, pd->dst.stride,
49
0
                              x->e_mbd.bd);
50
0
    return;
51
0
  }
52
45.3M
#endif  // CONFIG_VP9_HIGHBITDEPTH
53
45.3M
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
54
45.3M
                     pd->dst.buf, pd->dst.stride);
55
45.3M
}
56
57
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
58
  { 10, 6 },
59
  { 8, 5 },
60
};
61
62
// 'num' can be negative, but 'shift' must be non-negative.
63
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
64
1.42M
  (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
65
66
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
67
20.8M
                   int ctx) {
68
20.8M
  MACROBLOCKD *const xd = &mb->e_mbd;
69
20.8M
  struct macroblock_plane *const p = &mb->plane[plane];
70
20.8M
  struct macroblockd_plane *const pd = &xd->plane[plane];
71
20.8M
  const int ref = is_inter_block(xd->mi[0]);
72
20.8M
  uint8_t token_cache[1024];
73
20.8M
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
74
20.8M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
75
20.8M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
76
20.8M
  const int eob = p->eobs[block];
77
20.8M
  const PLANE_TYPE plane_type = get_plane_type(plane);
78
20.8M
  const int default_eob = 16 << (tx_size << 1);
79
20.8M
  const int shift = (tx_size == TX_32X32);
80
20.8M
  const int16_t *const dequant_ptr = pd->dequant;
81
20.8M
  const uint8_t *const band_translate = get_band_translate(tx_size);
82
20.8M
  const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block);
83
20.8M
  const int16_t *const scan = so->scan;
84
20.8M
  const int16_t *const nb = so->neighbors;
85
20.8M
  const MODE_INFO *mbmi = xd->mi[0];
86
20.8M
  const int sharpness = mb->sharpness;
87
20.8M
  const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
88
20.8M
  const int64_t rdmult =
89
20.8M
      (sharpness == 0 ? rdadj >> 1
90
20.8M
                      : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
91
92
20.8M
  const int64_t rddiv = mb->rddiv;
93
20.8M
  int64_t rd_cost0, rd_cost1;
94
20.8M
  int64_t rate0, rate1;
95
20.8M
  int16_t t0, t1;
96
20.8M
  int i, final_eob;
97
20.8M
  int count_high_values_after_eob = 0;
98
20.8M
#if CONFIG_VP9_HIGHBITDEPTH
99
20.8M
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
100
#else
101
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
102
#endif
103
20.8M
  unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
104
20.8M
      mb->token_costs[tx_size][plane_type][ref];
105
20.8M
  unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
106
20.8M
  int64_t eob_cost0, eob_cost1;
107
20.8M
  const int ctx0 = ctx;
108
20.8M
  int64_t accu_rate = 0;
109
  // Initialized to the worst possible error for the largest transform size.
110
  // This ensures that it never goes negative.
111
20.8M
  int64_t accu_error = ((int64_t)1) << 50;
112
20.8M
  int64_t best_block_rd_cost = INT64_MAX;
113
20.8M
  int x_prev = 1;
114
20.8M
  tran_low_t before_best_eob_qc = 0;
115
20.8M
  tran_low_t before_best_eob_dqc = 0;
116
117
20.8M
  assert((!plane_type && !plane) || (plane_type && plane));
118
20.8M
  assert(eob <= default_eob);
119
120
421M
  for (i = 0; i < eob; i++) {
121
400M
    const int rc = scan[i];
122
400M
    token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
123
400M
  }
124
20.8M
  final_eob = 0;
125
126
  // Initial RD cost.
127
20.8M
  token_costs_cur = token_costs + band_translate[0];
128
20.8M
  rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
129
20.8M
  best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
130
131
  // For each token, pick one of two choices greedily:
132
  // (i) First candidate: Keep current quantized value, OR
133
  // (ii) Second candidate: Reduce quantized value by 1.
134
421M
  for (i = 0; i < eob; i++) {
135
400M
    const int rc = scan[i];
136
400M
    const int x = qcoeff[rc];
137
400M
    const int band_cur = band_translate[i];
138
400M
    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
139
400M
    const int token_tree_sel_cur = (x_prev == 0);
140
400M
    token_costs_cur = token_costs + band_cur;
141
400M
    if (x == 0) {  // No need to search
142
160M
      const int token = vp9_get_token(x);
143
160M
      rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
144
160M
      accu_rate += rate0;
145
160M
      x_prev = 0;
146
      // Note: accu_error does not change.
147
239M
    } else {
148
239M
      const int dqv = dequant_ptr[rc != 0];
149
      // Compute the distortion for quantizing to 0.
150
239M
      const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
151
239M
      const int diff_for_zero =
152
239M
#if CONFIG_VP9_HIGHBITDEPTH
153
239M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
154
239M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
155
239M
              :
156
239M
#endif
157
239M
              diff_for_zero_raw;
158
239M
      const int64_t distortion_for_zero =
159
239M
          (int64_t)diff_for_zero * diff_for_zero;
160
161
      // Compute the distortion for the first candidate
162
239M
      const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
163
239M
      const int diff0 =
164
239M
#if CONFIG_VP9_HIGHBITDEPTH
165
239M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
166
239M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
167
239M
              :
168
239M
#endif  // CONFIG_VP9_HIGHBITDEPTH
169
239M
              diff0_raw;
170
239M
      const int64_t distortion0 = (int64_t)diff0 * diff0;
171
172
      // Compute the distortion for the second candidate
173
239M
      const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
174
239M
      const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
175
239M
      int64_t distortion1;
176
239M
      if (x1 != 0) {
177
160M
        const int dqv_step =
178
160M
#if CONFIG_VP9_HIGHBITDEPTH
179
160M
            (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
180
160M
                                                          :
181
160M
#endif  // CONFIG_VP9_HIGHBITDEPTH
182
160M
                                                          dqv;
183
160M
        const int diff_step = (dqv_step + sign) ^ sign;
184
160M
        const int diff1 = diff0 - diff_step;
185
160M
        assert(dqv > 0);  // We aren't right shifting a negative number above.
186
160M
        distortion1 = (int64_t)diff1 * diff1;
187
160M
      } else {
188
79.3M
        distortion1 = distortion_for_zero;
189
79.3M
      }
190
239M
      {
191
        // Calculate RDCost for current coeff for the two candidates.
192
239M
        const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
193
239M
        const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
194
239M
        rate0 =
195
239M
            base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
196
239M
        rate1 =
197
239M
            base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
198
239M
      }
199
239M
      {
200
239M
        int rdcost_better_for_x1, eob_rdcost_better_for_x1;
201
239M
        int dqc0, dqc1;
202
239M
        int64_t best_eob_cost_cur;
203
239M
        int use_x1;
204
205
        // Calculate RD Cost effect on the next coeff for the two candidates.
206
239M
        int64_t next_bits0 = 0;
207
239M
        int64_t next_bits1 = 0;
208
239M
        int64_t next_eob_bits0 = 0;
209
239M
        int64_t next_eob_bits1 = 0;
210
239M
        if (i < default_eob - 1) {
211
234M
          int ctx_next, token_tree_sel_next;
212
234M
          const int band_next = band_translate[i + 1];
213
234M
          const int token_next =
214
234M
              (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
215
234M
          unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
216
234M
                                               [ENTROPY_TOKENS] =
217
234M
                                                   token_costs + band_next;
218
234M
          token_cache[rc] = vp9_pt_energy_class[t0];
219
234M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
220
234M
          token_tree_sel_next = (x == 0);
221
234M
          next_bits0 =
222
234M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
223
234M
          next_eob_bits0 =
224
234M
              (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
225
234M
          token_cache[rc] = vp9_pt_energy_class[t1];
226
234M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
227
234M
          token_tree_sel_next = (x1 == 0);
228
234M
          next_bits1 =
229
234M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
230
234M
          if (x1 != 0) {
231
156M
            next_eob_bits1 =
232
156M
                (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
233
156M
          }
234
234M
        }
235
236
        // Compare the total RD costs for two candidates.
237
239M
        rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
238
239M
        rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
239
239M
        rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
240
239M
        eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
241
239M
                           (accu_error + distortion0 - distortion_for_zero));
242
239M
        eob_cost1 = eob_cost0;
243
239M
        if (x1 != 0) {
244
160M
          eob_cost1 =
245
160M
              RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
246
160M
                     (accu_error + distortion1 - distortion_for_zero));
247
160M
          eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
248
160M
        } else {
249
79.3M
          eob_rdcost_better_for_x1 = 0;
250
79.3M
        }
251
252
        // Calculate the two candidate de-quantized values.
253
239M
        dqc0 = dqcoeff[rc];
254
239M
        dqc1 = 0;
255
239M
        if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
256
3.06M
          if (x1 != 0) {
257
1.42M
            dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
258
1.63M
          } else {
259
1.63M
            dqc1 = 0;
260
1.63M
          }
261
3.06M
        }
262
263
        // Pick and record the better quantized and de-quantized values.
264
239M
        if (rdcost_better_for_x1) {
265
2.94M
          qcoeff[rc] = x1;
266
2.94M
          dqcoeff[rc] = dqc1;
267
2.94M
          accu_rate += rate1;
268
2.94M
          accu_error += distortion1 - distortion_for_zero;
269
2.94M
          assert(distortion1 <= distortion_for_zero);
270
2.94M
          token_cache[rc] = vp9_pt_energy_class[t1];
271
236M
        } else {
272
236M
          accu_rate += rate0;
273
236M
          accu_error += distortion0 - distortion_for_zero;
274
236M
          assert(distortion0 <= distortion_for_zero);
275
236M
          token_cache[rc] = vp9_pt_energy_class[t0];
276
236M
        }
277
239M
        if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
278
239M
        assert(accu_error >= 0);
279
239M
        x_prev = qcoeff[rc];  // Update based on selected quantized value.
280
281
239M
        use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
282
239M
        best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
283
284
        // Determine whether to move the eob position to i+1
285
239M
        if (best_eob_cost_cur < best_block_rd_cost) {
286
228M
          best_block_rd_cost = best_eob_cost_cur;
287
228M
          final_eob = i + 1;
288
228M
          count_high_values_after_eob = 0;
289
228M
          if (use_x1) {
290
1.41M
            before_best_eob_qc = x1;
291
1.41M
            before_best_eob_dqc = dqc1;
292
227M
          } else {
293
227M
            before_best_eob_qc = x;
294
227M
            before_best_eob_dqc = dqc0;
295
227M
          }
296
228M
        }
297
239M
      }
298
239M
    }
299
400M
  }
300
20.8M
  if (count_high_values_after_eob > 0) {
301
0
    final_eob = eob - 1;
302
0
    for (; final_eob >= 0; final_eob--) {
303
0
      const int rc = scan[final_eob];
304
0
      const int x = qcoeff[rc];
305
0
      if (x) {
306
0
        break;
307
0
      }
308
0
    }
309
0
    final_eob++;
310
20.8M
  } else {
311
20.8M
    assert(final_eob <= eob);
312
20.8M
    if (final_eob > 0) {
313
11.2M
      int rc;
314
11.2M
      assert(before_best_eob_qc != 0);
315
11.2M
      i = final_eob - 1;
316
11.2M
      rc = scan[i];
317
11.2M
      qcoeff[rc] = before_best_eob_qc;
318
11.2M
      dqcoeff[rc] = before_best_eob_dqc;
319
11.2M
    }
320
47.3M
    for (i = final_eob; i < eob; i++) {
321
26.5M
      int rc = scan[i];
322
26.5M
      qcoeff[rc] = 0;
323
26.5M
      dqcoeff[rc] = 0;
324
26.5M
    }
325
20.8M
  }
326
20.8M
  mb->plane[plane].eobs[block] = final_eob;
327
20.8M
  return final_eob;
328
20.8M
}
329
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
330
331
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
332
5.12M
                             tran_low_t *dst, int src_stride) {
333
5.12M
  if (rd_transform)
334
4.91M
    vpx_fdct32x32_rd(src, dst, src_stride);
335
209k
  else
336
209k
    vpx_fdct32x32(src, dst, src_stride);
337
5.12M
}
338
339
#if CONFIG_VP9_HIGHBITDEPTH
340
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
341
0
                                    tran_low_t *dst, int src_stride) {
342
0
  if (rd_transform)
343
0
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
344
0
  else
345
0
    vpx_highbd_fdct32x32(src, dst, src_stride);
346
0
}
347
#endif  // CONFIG_VP9_HIGHBITDEPTH
348
349
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
350
0
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
351
0
  MACROBLOCKD *const xd = &x->e_mbd;
352
0
  const struct macroblock_plane *const p = &x->plane[plane];
353
0
  const struct macroblockd_plane *const pd = &xd->plane[plane];
354
0
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
355
0
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
356
0
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
357
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
358
0
  uint16_t *const eob = &p->eobs[block];
359
0
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
360
0
  const int16_t *src_diff;
361
0
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
362
  // skip block condition should be handled before this is called.
363
0
  assert(!x->skip_block);
364
365
0
#if CONFIG_VP9_HIGHBITDEPTH
366
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
367
0
    switch (tx_size) {
368
0
      case TX_32X32:
369
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
370
0
        vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff,
371
0
                                     pd->dequant, eob, scan_order);
372
0
        break;
373
0
      case TX_16X16:
374
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375
0
        vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
376
0
                               scan_order);
377
0
        break;
378
0
      case TX_8X8:
379
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
380
0
        vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
381
0
                               scan_order);
382
0
        break;
383
0
      default:
384
0
        assert(tx_size == TX_4X4);
385
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
386
0
        vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
387
0
                               scan_order);
388
0
        break;
389
0
    }
390
0
    return;
391
0
  }
392
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
393
394
0
  switch (tx_size) {
395
0
    case TX_32X32:
396
0
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
397
0
      vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob,
398
0
                            scan_order);
399
0
      break;
400
0
    case TX_16X16:
401
0
      vpx_fdct16x16(src_diff, coeff, diff_stride);
402
0
      vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
403
0
                      scan_order);
404
0
      break;
405
0
    case TX_8X8:
406
0
      vpx_fdct8x8(src_diff, coeff, diff_stride);
407
0
      vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
408
0
                      scan_order);
409
410
0
      break;
411
0
    default:
412
0
      assert(tx_size == TX_4X4);
413
0
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
414
0
      vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
415
0
                      scan_order);
416
0
      break;
417
0
  }
418
0
}
419
420
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
421
434k
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
422
434k
  MACROBLOCKD *const xd = &x->e_mbd;
423
434k
  const struct macroblock_plane *const p = &x->plane[plane];
424
434k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
425
434k
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
426
434k
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
427
434k
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
428
434k
  uint16_t *const eob = &p->eobs[block];
429
434k
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
430
434k
  const int16_t *src_diff;
431
434k
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
432
  // skip block condition should be handled before this is called.
433
434k
  assert(!x->skip_block);
434
435
434k
#if CONFIG_VP9_HIGHBITDEPTH
436
434k
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
437
0
    switch (tx_size) {
438
0
      case TX_32X32:
439
0
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
440
0
        vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
441
0
                                     dqcoeff, pd->dequant[0], eob);
442
0
        break;
443
0
      case TX_16X16:
444
0
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
445
0
        vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
446
0
                               dqcoeff, pd->dequant[0], eob);
447
0
        break;
448
0
      case TX_8X8:
449
0
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
450
0
        vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
451
0
                               dqcoeff, pd->dequant[0], eob);
452
0
        break;
453
0
      default:
454
0
        assert(tx_size == TX_4X4);
455
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
456
0
        vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
457
0
                               dqcoeff, pd->dequant[0], eob);
458
0
        break;
459
0
    }
460
0
    return;
461
0
  }
462
434k
#endif  // CONFIG_VP9_HIGHBITDEPTH
463
464
434k
  switch (tx_size) {
465
3.65k
    case TX_32X32:
466
3.65k
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
467
3.65k
      vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
468
3.65k
                            pd->dequant[0], eob);
469
3.65k
      break;
470
12.0k
    case TX_16X16:
471
12.0k
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
472
12.0k
      vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
473
12.0k
                      pd->dequant[0], eob);
474
12.0k
      break;
475
71.9k
    case TX_8X8:
476
71.9k
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
477
71.9k
      vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
478
71.9k
                      pd->dequant[0], eob);
479
71.9k
      break;
480
347k
    default:
481
347k
      assert(tx_size == TX_4X4);
482
347k
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
483
347k
      vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
484
347k
                      pd->dequant[0], eob);
485
347k
      break;
486
434k
  }
487
434k
}
488
489
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
490
69.4M
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
491
69.4M
  MACROBLOCKD *const xd = &x->e_mbd;
492
69.4M
  const struct macroblock_plane *const p = &x->plane[plane];
493
69.4M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
494
69.4M
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
495
69.4M
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
496
69.4M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
497
69.4M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
498
69.4M
  uint16_t *const eob = &p->eobs[block];
499
69.4M
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
500
69.4M
  const int16_t *src_diff;
501
69.4M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
502
  // skip block condition should be handled before this is called.
503
69.4M
  assert(!x->skip_block);
504
505
69.4M
#if CONFIG_VP9_HIGHBITDEPTH
506
69.4M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
507
0
    switch (tx_size) {
508
0
      case TX_32X32:
509
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
510
0
        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
511
0
                                    scan_order);
512
0
        break;
513
0
      case TX_16X16:
514
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
515
0
        vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
516
0
                              scan_order);
517
0
        break;
518
0
      case TX_8X8:
519
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
520
0
        vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
521
0
                              scan_order);
522
0
        break;
523
0
      default:
524
0
        assert(tx_size == TX_4X4);
525
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
526
0
        vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
527
0
                              scan_order);
528
0
        break;
529
0
    }
530
0
    return;
531
0
  }
532
69.4M
#endif  // CONFIG_VP9_HIGHBITDEPTH
533
534
69.4M
  switch (tx_size) {
535
1.20M
    case TX_32X32:
536
1.20M
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
537
1.20M
      vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
538
1.20M
                           scan_order);
539
1.20M
      break;
540
4.83M
    case TX_16X16:
541
4.83M
      vpx_fdct16x16(src_diff, coeff, diff_stride);
542
4.83M
      vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
543
4.83M
                     scan_order);
544
4.83M
      break;
545
19.7M
    case TX_8X8:
546
19.7M
      vpx_fdct8x8(src_diff, coeff, diff_stride);
547
19.7M
      vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
548
19.7M
                     scan_order);
549
19.7M
      break;
550
43.6M
    default:
551
43.6M
      assert(tx_size == TX_4X4);
552
43.6M
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
553
43.6M
      vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
554
43.6M
                     scan_order);
555
43.6M
      break;
556
69.4M
  }
557
69.4M
}
558
559
static void encode_block(int plane, int block, int row, int col,
560
9.22M
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
561
9.22M
  struct encode_b_args *const args = arg;
562
#if CONFIG_MISMATCH_DEBUG
563
  int mi_row = args->mi_row;
564
  int mi_col = args->mi_col;
565
  int output_enabled = args->output_enabled;
566
#endif
567
9.22M
  MACROBLOCK *const x = args->x;
568
9.22M
  MACROBLOCKD *const xd = &x->e_mbd;
569
9.22M
  struct macroblock_plane *const p = &x->plane[plane];
570
9.22M
  struct macroblockd_plane *const pd = &xd->plane[plane];
571
9.22M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
572
9.22M
  uint8_t *dst;
573
9.22M
  ENTROPY_CONTEXT *a, *l;
574
9.22M
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
575
9.22M
  a = &args->ta[col];
576
9.22M
  l = &args->tl[row];
577
578
  // TODO(jingning): per transformed block zero forcing only enabled for
579
  // luma component. will integrate chroma components as well.
580
9.22M
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
581
2.91M
    p->eobs[block] = 0;
582
2.91M
    *a = *l = 0;
583
#if CONFIG_MISMATCH_DEBUG
584
    goto encode_block_end;
585
#else
586
2.91M
    return;
587
2.91M
#endif
588
2.91M
  }
589
590
6.30M
  if (!x->skip_recode) {
591
6.30M
    if (x->quant_fp) {
592
      // Encoding process for rtc mode
593
0
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
594
        // skip forward transform
595
0
        p->eobs[block] = 0;
596
0
        *a = *l = 0;
597
#if CONFIG_MISMATCH_DEBUG
598
        goto encode_block_end;
599
#else
600
0
        return;
601
0
#endif
602
0
      } else {
603
0
        vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
604
0
      }
605
6.30M
    } else {
606
6.30M
      if (max_txsize_lookup[plane_bsize] == tx_size) {
607
4.06M
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
608
4.06M
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
609
          // full forward transform and quantization
610
4.06M
          vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
611
4.06M
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
612
          // fast path forward transform and quantization
613
0
          vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
614
0
        } else {
615
          // skip forward transform
616
0
          p->eobs[block] = 0;
617
0
          *a = *l = 0;
618
#if CONFIG_MISMATCH_DEBUG
619
          goto encode_block_end;
620
#else
621
0
          return;
622
0
#endif
623
0
        }
624
4.06M
      } else {
625
2.24M
        vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
626
2.24M
      }
627
6.30M
    }
628
6.30M
  }
629
630
6.30M
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
631
0
    const int ctx = combine_entropy_contexts(*a, *l);
632
0
    *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
633
6.30M
  } else {
634
6.30M
    *a = *l = p->eobs[block] > 0;
635
6.30M
  }
636
637
6.30M
  if (p->eobs[block]) *(args->skip) = 0;
638
639
6.30M
  if (x->skip_encode || p->eobs[block] == 0) {
640
#if CONFIG_MISMATCH_DEBUG
641
    goto encode_block_end;
642
#else
643
485k
    return;
644
485k
#endif
645
485k
  }
646
5.82M
#if CONFIG_VP9_HIGHBITDEPTH
647
5.82M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
648
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
649
0
    switch (tx_size) {
650
0
      case TX_32X32:
651
0
        vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
652
0
                                 xd->bd);
653
0
        break;
654
0
      case TX_16X16:
655
0
        vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
656
0
                                 xd->bd);
657
0
        break;
658
0
      case TX_8X8:
659
0
        vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
660
0
                               xd->bd);
661
0
        break;
662
0
      default:
663
0
        assert(tx_size == TX_4X4);
664
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
665
        // which is significant (not just an optimization) for the lossless
666
        // case.
667
0
        x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
668
0
                               xd->bd);
669
0
        break;
670
0
    }
671
#if CONFIG_MISMATCH_DEBUG
672
    goto encode_block_end;
673
#else
674
0
    return;
675
0
#endif
676
0
  }
677
5.82M
#endif  // CONFIG_VP9_HIGHBITDEPTH
678
679
5.82M
  switch (tx_size) {
680
8.51k
    case TX_32X32:
681
8.51k
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
682
8.51k
      break;
683
61.6k
    case TX_16X16:
684
61.6k
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
685
61.6k
      break;
686
398k
    case TX_8X8:
687
398k
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
688
398k
      break;
689
5.35M
    default:
690
5.35M
      assert(tx_size == TX_4X4);
691
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
692
      // which is significant (not just an optimization) for the lossless
693
      // case.
694
5.35M
      x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
695
5.35M
      break;
696
5.82M
  }
697
#if CONFIG_MISMATCH_DEBUG
698
encode_block_end:
699
  if (output_enabled) {
700
    int pixel_c, pixel_r;
701
    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
702
    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
703
    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
704
                    pd->subsampling_x, pd->subsampling_y);
705
    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
706
                             blk_w, blk_h,
707
                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
708
  }
709
#endif
710
5.82M
}
711
712
static void encode_block_pass1(int plane, int block, int row, int col,
713
                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
714
0
                               void *arg) {
715
0
  MACROBLOCK *const x = (MACROBLOCK *)arg;
716
0
  MACROBLOCKD *const xd = &x->e_mbd;
717
0
  struct macroblock_plane *const p = &x->plane[plane];
718
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
719
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
720
0
  uint8_t *dst;
721
0
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
722
723
0
  vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
724
725
0
  if (p->eobs[block] > 0) {
726
0
#if CONFIG_VP9_HIGHBITDEPTH
727
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
728
0
      x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
729
0
                             p->eobs[block], xd->bd);
730
0
      return;
731
0
    }
732
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
733
0
    x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
734
0
  }
735
0
}
736
737
0
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
738
0
  vp9_subtract_plane(x, bsize, 0);
739
0
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
740
0
                                         encode_block_pass1, x);
741
0
}
742
743
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
744
2.07M
                   int output_enabled) {
745
2.07M
  MACROBLOCKD *const xd = &x->e_mbd;
746
2.07M
  struct optimize_ctx ctx;
747
2.07M
  MODE_INFO *mi = xd->mi[0];
748
2.07M
  int plane;
749
#if CONFIG_MISMATCH_DEBUG
750
  struct encode_b_args arg = { x,
751
                               1,     // enable_trellis_opt
752
                               0.0,   // trellis_opt_thresh
753
                               NULL,  // &sse_calc_done
754
                               NULL,  // &sse
755
                               NULL,  // above entropy context
756
                               NULL,  // left entropy context
757
                               &mi->skip, mi_row, mi_col, output_enabled };
758
#else
759
2.07M
  struct encode_b_args arg = { x,
760
2.07M
                               1,     // enable_trellis_opt
761
2.07M
                               0.0,   // trellis_opt_thresh
762
2.07M
                               NULL,  // &sse_calc_done
763
2.07M
                               NULL,  // &sse
764
2.07M
                               NULL,  // above entropy context
765
2.07M
                               NULL,  // left entropy context
766
2.07M
                               &mi->skip };
767
2.07M
  (void)mi_row;
768
2.07M
  (void)mi_col;
769
2.07M
  (void)output_enabled;
770
2.07M
#endif
771
772
2.07M
  mi->skip = 1;
773
774
2.07M
  if (x->skip) return;
775
776
6.73M
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
777
5.04M
    if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
778
779
5.04M
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
780
0
      const struct macroblockd_plane *const pd = &xd->plane[plane];
781
0
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
782
0
      vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
783
0
                               ctx.tl[plane]);
784
0
      arg.enable_trellis_opt = 1;
785
5.04M
    } else {
786
5.04M
      arg.enable_trellis_opt = 0;
787
5.04M
    }
788
5.04M
    arg.ta = ctx.ta[plane];
789
5.04M
    arg.tl = ctx.tl[plane];
790
791
5.04M
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
792
5.04M
                                           &arg);
793
5.04M
  }
794
1.68M
}
795
796
void vp9_encode_block_intra(int plane, int block, int row, int col,
797
                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
798
344M
                            void *arg) {
799
344M
  struct encode_b_args *const args = arg;
800
344M
  MACROBLOCK *const x = args->x;
801
344M
  MACROBLOCKD *const xd = &x->e_mbd;
802
344M
  MODE_INFO *mi = xd->mi[0];
803
344M
  struct macroblock_plane *const p = &x->plane[plane];
804
344M
  struct macroblockd_plane *const pd = &xd->plane[plane];
805
344M
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
806
344M
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
807
344M
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
808
344M
  const ScanOrder *scan_order;
809
344M
  TX_TYPE tx_type = DCT_DCT;
810
344M
  PREDICTION_MODE mode;
811
344M
  const int bwl = b_width_log2_lookup[plane_bsize];
812
344M
  const int diff_stride = 4 * (1 << bwl);
813
344M
  uint8_t *src, *dst;
814
344M
  int16_t *src_diff;
815
344M
  uint16_t *eob = &p->eobs[block];
816
344M
  const int src_stride = p->src.stride;
817
344M
  const int dst_stride = pd->dst.stride;
818
344M
  int enable_trellis_opt = !x->skip_recode;
819
344M
  ENTROPY_CONTEXT *a = NULL;
820
344M
  ENTROPY_CONTEXT *l = NULL;
821
344M
  int entropy_ctx = 0;
822
344M
  dst = &pd->dst.buf[4 * (row * dst_stride + col)];
823
344M
  src = &p->src.buf[4 * (row * src_stride + col)];
824
344M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
825
826
344M
  if (tx_size == TX_4X4) {
827
255M
    tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
828
255M
    scan_order = &vp9_scan_orders[TX_4X4][tx_type];
829
255M
    mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
830
255M
  } else {
831
89.3M
    mode = plane == 0 ? mi->mode : mi->uv_mode;
832
89.3M
    if (tx_size == TX_32X32) {
833
3.91M
      scan_order = &vp9_default_scan_orders[TX_32X32];
834
85.4M
    } else {
835
85.4M
      tx_type = get_tx_type(get_plane_type(plane), xd);
836
85.4M
      scan_order = &vp9_scan_orders[tx_size][tx_type];
837
85.4M
    }
838
89.3M
  }
839
840
344M
  vp9_predict_intra_block(
841
344M
      xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
842
344M
      (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
843
344M
      dst_stride, col, row, plane);
844
845
  // skip block condition should be handled before this is called.
846
344M
  assert(!x->skip_block);
847
848
344M
  if (!x->skip_recode) {
849
344M
    const int tx_size_in_pixels = (1 << tx_size) << 2;
850
344M
#if CONFIG_VP9_HIGHBITDEPTH
851
344M
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
852
0
      vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
853
0
                                diff_stride, src, src_stride, dst, dst_stride,
854
0
                                xd->bd);
855
344M
    } else {
856
344M
      vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
857
344M
                         diff_stride, src, src_stride, dst, dst_stride);
858
344M
    }
859
#else
860
    vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
861
                       diff_stride, src, src_stride, dst, dst_stride);
862
#endif
863
344M
    enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col,
864
344M
                                        plane_bsize, tx_size, args);
865
344M
  }
866
867
344M
  if (enable_trellis_opt) {
868
18.0M
    a = &args->ta[col];
869
18.0M
    l = &args->tl[row];
870
18.0M
    entropy_ctx = combine_entropy_contexts(*a, *l);
871
18.0M
  }
872
873
344M
#if CONFIG_VP9_HIGHBITDEPTH
874
344M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
875
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
876
0
    switch (tx_size) {
877
0
      case TX_32X32:
878
0
        if (!x->skip_recode) {
879
0
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
880
0
          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
881
0
                                      eob, scan_order);
882
0
        }
883
0
        if (enable_trellis_opt) {
884
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
885
0
        }
886
0
        if (!x->skip_encode && *eob) {
887
0
          vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
888
0
        }
889
0
        break;
890
0
      case TX_16X16:
891
0
        if (!x->skip_recode) {
892
0
          if (tx_type == DCT_DCT)
893
0
            vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
894
0
          else
895
0
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
896
0
          vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant,
897
0
                                eob, scan_order);
898
0
        }
899
0
        if (enable_trellis_opt) {
900
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
901
0
        }
902
0
        if (!x->skip_encode && *eob) {
903
0
          vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
904
0
                                  xd->bd);
905
0
        }
906
0
        break;
907
0
      case TX_8X8:
908
0
        if (!x->skip_recode) {
909
0
          if (tx_type == DCT_DCT)
910
0
            vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
911
0
          else
912
0
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
913
0
          vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
914
0
                                scan_order);
915
0
        }
916
0
        if (enable_trellis_opt) {
917
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
918
0
        }
919
0
        if (!x->skip_encode && *eob) {
920
0
          vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
921
0
                                xd->bd);
922
0
        }
923
0
        break;
924
0
      default:
925
0
        assert(tx_size == TX_4X4);
926
0
        if (!x->skip_recode) {
927
0
          if (tx_type != DCT_DCT)
928
0
            vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
929
0
          else
930
0
            x->fwd_txfm4x4(src_diff, coeff, diff_stride);
931
0
          vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
932
0
                                scan_order);
933
0
        }
934
0
        if (enable_trellis_opt) {
935
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
936
0
        }
937
0
        if (!x->skip_encode && *eob) {
938
0
          if (tx_type == DCT_DCT) {
939
            // this is like vp9_short_idct4x4 but has a special case around
940
            // eob<=1 which is significant (not just an optimization) for the
941
            // lossless case.
942
0
            x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
943
0
          } else {
944
0
            vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
945
0
                                     xd->bd);
946
0
          }
947
0
        }
948
0
        break;
949
0
    }
950
0
    if (*eob) *(args->skip) = 0;
951
0
    return;
952
0
  }
953
344M
#endif  // CONFIG_VP9_HIGHBITDEPTH
954
955
344M
  switch (tx_size) {
956
3.91M
    case TX_32X32:
957
3.91M
      if (!x->skip_recode) {
958
3.91M
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
959
3.91M
        vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
960
3.91M
                             scan_order);
961
3.91M
      }
962
3.91M
      if (enable_trellis_opt) {
963
477k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
964
477k
      }
965
3.91M
      if (!x->skip_encode && *eob)
966
2.82M
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
967
3.91M
      break;
968
15.0M
    case TX_16X16:
969
15.0M
      if (!x->skip_recode) {
970
15.0M
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
971
15.0M
        vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
972
15.0M
                       scan_order);
973
15.0M
      }
974
15.0M
      if (enable_trellis_opt) {
975
880k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
976
880k
      }
977
15.0M
      if (!x->skip_encode && *eob)
978
12.6M
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
979
15.0M
      break;
980
70.3M
    case TX_8X8:
981
70.3M
      if (!x->skip_recode) {
982
70.3M
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
983
70.3M
        vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
984
70.3M
                       scan_order);
985
70.3M
      }
986
70.3M
      if (enable_trellis_opt) {
987
3.80M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
988
3.80M
      }
989
70.3M
      if (!x->skip_encode && *eob)
990
58.0M
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
991
70.3M
      break;
992
255M
    default:
993
255M
      assert(tx_size == TX_4X4);
994
255M
      if (!x->skip_recode) {
995
255M
        if (tx_type != DCT_DCT)
996
23.7M
          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
997
231M
        else
998
231M
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
999
255M
        vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1000
255M
                       scan_order);
1001
255M
      }
1002
255M
      if (enable_trellis_opt) {
1003
12.8M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004
12.8M
      }
1005
255M
      if (!x->skip_encode && *eob) {
1006
199M
        if (tx_type == DCT_DCT)
1007
          // this is like vp9_short_idct4x4 but has a special case around eob<=1
1008
          // which is significant (not just an optimization) for the lossless
1009
          // case.
1010
179M
          x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1011
19.1M
        else
1012
19.1M
          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1013
199M
      }
1014
255M
      break;
1015
344M
  }
1016
344M
  if (*eob) *(args->skip) = 0;
1017
344M
}
1018
1019
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1020
18.5M
                                  int enable_trellis_opt) {
1021
18.5M
  const MACROBLOCKD *const xd = &x->e_mbd;
1022
18.5M
  struct optimize_ctx ctx;
1023
#if CONFIG_MISMATCH_DEBUG
1024
  // TODO(angiebird): make mismatch_debug support intra mode
1025
  struct encode_b_args arg = {
1026
    x,
1027
    enable_trellis_opt,
1028
    0.0,   // trellis_opt_thresh
1029
    NULL,  // &sse_calc_done
1030
    NULL,  // &sse
1031
    ctx.ta[plane],
1032
    ctx.tl[plane],
1033
    &xd->mi[0]->skip,
1034
    0,  // mi_row
1035
    0,  // mi_col
1036
    0   // output_enabled
1037
  };
1038
#else
1039
18.5M
  struct encode_b_args arg = { x,
1040
18.5M
                               enable_trellis_opt,
1041
18.5M
                               0.0,   // trellis_opt_thresh
1042
18.5M
                               NULL,  // &sse_calc_done
1043
18.5M
                               NULL,  // &sse
1044
18.5M
                               ctx.ta[plane],
1045
18.5M
                               ctx.tl[plane],
1046
18.5M
                               &xd->mi[0]->skip };
1047
18.5M
#endif
1048
1049
18.5M
  if (enable_trellis_opt && x->optimize &&
1050
18.5M
      (!x->skip_recode || !x->skip_optimize)) {
1051
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1052
0
    const TX_SIZE tx_size =
1053
0
        plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1054
0
    vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1055
18.5M
  } else {
1056
18.5M
    arg.enable_trellis_opt = 0;
1057
18.5M
  }
1058
1059
18.5M
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1060
18.5M
                                         vp9_encode_block_intra, &arg);
1061
18.5M
}