Coverage Report

Created: 2026-04-01 07:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp9/encoder/vp9_encodemb.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <stdlib.h>
12
13
#include "./vp9_rtcd.h"
14
#include "./vpx_config.h"
15
#include "./vpx_dsp_rtcd.h"
16
17
#include "vpx_dsp/quantize.h"
18
#include "vpx_mem/vpx_mem.h"
19
#include "vpx_ports/mem.h"
20
21
#if CONFIG_MISMATCH_DEBUG
22
#include "vpx_util/vpx_debug_util.h"
23
#endif
24
25
#include "vp9/common/vp9_idct.h"
26
#include "vp9/common/vp9_reconinter.h"
27
#include "vp9/common/vp9_reconintra.h"
28
#include "vp9/common/vp9_scan.h"
29
30
#include "vp9/encoder/vp9_encodemb.h"
31
#include "vp9/encoder/vp9_encoder.h"
32
#include "vp9/encoder/vp9_rd.h"
33
#include "vp9/encoder/vp9_tokenize.h"
34
35
#if defined(NDEBUG)
36
#if defined(__clang__) && defined(__has_builtin)
37
#if __has_builtin(__builtin_assume)
38
// This is verified by test/vp9_scan_test.cc
39
#define ASSUME_VALID_SCAN_VALUE(i) \
40
1.14G
  __builtin_assume(0 <= i && i <= MAX_SCAN_VALUE)
41
// This is verified by test/vp9_entropy_test.cc
42
#define ASSUME_VALID_ENERGY_CLASS(i) \
43
1.16G
  __builtin_assume(0 <= i && i <= MAX_ENERGY_CLASS)
44
1.08G
#define ASSUME_VALID_TOKEN(i) __builtin_assume(0 <= i && i <= MAX_TOKEN)
45
#else
46
#define ASSUME_VALID_SCAN_VALUE(i) \
47
  do {                             \
48
  } while (0)
49
#define ASSUME_VALID_ENERGY_CLASS(i) \
50
  do {                               \
51
  } while (0)
52
#define ASSUME_VALID_TOKEN(i) \
53
  do {                        \
54
  } while (0)
55
#endif
56
#else
57
#define ASSUME_VALID_SCAN_VALUE(i) \
58
  do {                             \
59
  } while (0)
60
#define ASSUME_VALID_ENERGY_CLASS(i) \
61
  do {                               \
62
  } while (0)
63
#define ASSUME_VALID_TOKEN(i) \
64
  do {                        \
65
  } while (0)
66
#endif
67
#else
68
#define ASSUME_VALID_SCAN_VALUE(i) assert(0 <= i && i <= MAX_SCAN_VALUE)
69
#define ASSUME_VALID_ENERGY_CLASS(i) assert(0 <= i && i <= MAX_ENERGY_CLASS)
70
#define ASSUME_VALID_TOKEN(i) assert(0 <= i && i <= MAX_TOKEN)
71
#endif
72
73
struct optimize_ctx {
74
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
75
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
76
};
77
78
41.5M
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
79
41.5M
  struct macroblock_plane *const p = &x->plane[plane];
80
41.5M
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
81
41.5M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
82
41.5M
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
83
41.5M
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
84
85
41.5M
#if CONFIG_VP9_HIGHBITDEPTH
86
41.5M
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
87
0
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
88
0
                              p->src.stride, pd->dst.buf, pd->dst.stride,
89
0
                              x->e_mbd.bd);
90
0
    return;
91
0
  }
92
41.5M
#endif  // CONFIG_VP9_HIGHBITDEPTH
93
41.5M
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
94
41.5M
                     pd->dst.buf, pd->dst.stride);
95
41.5M
}
96
97
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
98
  { 10, 6 },
99
  { 8, 5 },
100
};
101
102
// 'num' can be negative, but 'shift' must be non-negative.
103
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
104
1.78M
  (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
105
106
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
107
29.1M
                   int ctx) {
108
29.1M
  MACROBLOCKD *const xd = &mb->e_mbd;
109
29.1M
  struct macroblock_plane *const p = &mb->plane[plane];
110
29.1M
  struct macroblockd_plane *const pd = &xd->plane[plane];
111
29.1M
  const int ref = is_inter_block(xd->mi[0]);
112
29.1M
  uint8_t token_cache[MAX_SCAN_VALUE + 1];
113
29.1M
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
114
29.1M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
115
29.1M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
116
29.1M
  const int eob = p->eobs[block];
117
29.1M
  const PLANE_TYPE plane_type = get_plane_type(plane);
118
29.1M
  const int default_eob = 16 << (tx_size << 1);
119
29.1M
  const int shift = (tx_size == TX_32X32);
120
29.1M
  const int16_t *const dequant_ptr = pd->dequant;
121
29.1M
  const uint8_t *const band_translate = get_band_translate(tx_size);
122
29.1M
  const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block);
123
29.1M
  const int16_t *const scan = so->scan;
124
29.1M
  const int16_t *const nb = so->neighbors;
125
29.1M
  const MODE_INFO *mbmi = xd->mi[0];
126
29.1M
  const int sharpness = mb->sharpness;
127
29.1M
  const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
128
29.1M
  const int64_t rdmult =
129
29.1M
      (sharpness == 0 ? rdadj >> 1
130
29.1M
                      : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
131
132
29.1M
  const int64_t rddiv = mb->rddiv;
133
29.1M
  int64_t rd_cost0, rd_cost1;
134
29.1M
  int64_t rate0, rate1;
135
29.1M
  int16_t t0, t1;
136
29.1M
  int i, final_eob;
137
29.1M
  int count_high_values_after_eob = 0;
138
29.1M
#if CONFIG_VP9_HIGHBITDEPTH
139
29.1M
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
140
#else
141
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
142
#endif
143
29.1M
  unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
144
29.1M
      mb->token_costs[tx_size][plane_type][ref];
145
29.1M
  unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
146
29.1M
  int64_t eob_cost0, eob_cost1;
147
29.1M
  int64_t accu_rate = 0;
148
  // Initialized to the worst possible error for the largest transform size.
149
  // This ensures that it never goes negative.
150
29.1M
  int64_t accu_error = ((int64_t)1) << 50;
151
29.1M
  int64_t best_block_rd_cost = INT64_MAX;
152
29.1M
  int x_prev = 1;
153
29.1M
  tran_low_t before_best_eob_qc = 0;
154
29.1M
  tran_low_t before_best_eob_dqc = 0;
155
156
29.1M
  assert((!plane_type && !plane) || (plane_type && plane));
157
29.1M
  assert(eob <= default_eob);
158
159
576M
  for (i = 0; i < eob; i++) {
160
547M
    const int rc = scan[i];
161
547M
    ASSUME_VALID_SCAN_VALUE(rc);
162
547M
    int16_t token = vp9_get_token(qcoeff[rc]);
163
547M
    ASSUME_VALID_TOKEN(token);
164
547M
    token_cache[rc] = vp9_pt_energy_class[token];
165
547M
  }
166
29.1M
  final_eob = 0;
167
168
  // This is used in the first iteration, and must be inbounds. We cannot
169
  // locally verify that this is in bounds, so we need to verify at runtime.
170
  // For now, only verify if we have array-bounds turned on.
171
29.1M
#if defined(__clang__) && defined(__has_feature)
172
#if __has_feature(array_bounds_sanitizer)
173
  if (ctx < 0 || ctx > MAX_ENERGY_CLASS) {
174
    abort();
175
  }
176
#endif
177
29.1M
#endif
178
179
  // Initial RD cost.
180
29.1M
  token_costs_cur = token_costs + band_translate[0];
181
29.1M
  rate0 = (*token_costs_cur)[0][ctx][EOB_TOKEN];
182
29.1M
  best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
183
184
  // For each token, pick one of two choices greedily:
185
  // (i) First candidate: Keep current quantized value, OR
186
  // (ii) Second candidate: Reduce quantized value by 1.
187
576M
  for (i = 0; i < eob; i++) {
188
547M
    const int rc = scan[i];
189
547M
    ASSUME_VALID_SCAN_VALUE(rc);
190
547M
    const int x = qcoeff[rc];
191
547M
    const int band_cur = band_translate[i];
192
547M
    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
193
547M
    ASSUME_VALID_ENERGY_CLASS(ctx_cur);
194
547M
    const int token_tree_sel_cur = (x_prev == 0);
195
547M
    token_costs_cur = token_costs + band_cur;
196
547M
    if (x == 0) {  // No need to search
197
231M
      const int token = vp9_get_token(x);
198
231M
      ASSUME_VALID_TOKEN(token);
199
231M
      rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
200
231M
      accu_rate += rate0;
201
231M
      x_prev = 0;
202
      // Note: accu_error does not change.
203
315M
    } else {
204
315M
      const int dqv = dequant_ptr[rc != 0];
205
      // Compute the distortion for quantizing to 0.
206
315M
      const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
207
315M
      const int diff_for_zero =
208
315M
#if CONFIG_VP9_HIGHBITDEPTH
209
315M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
210
315M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
211
315M
              :
212
315M
#endif
213
315M
              diff_for_zero_raw;
214
315M
      const int64_t distortion_for_zero =
215
315M
          (int64_t)diff_for_zero * diff_for_zero;
216
217
      // Compute the distortion for the first candidate
218
315M
      const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
219
315M
      const int diff0 =
220
315M
#if CONFIG_VP9_HIGHBITDEPTH
221
315M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
222
315M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
223
315M
              :
224
315M
#endif  // CONFIG_VP9_HIGHBITDEPTH
225
315M
              diff0_raw;
226
315M
      const int64_t distortion0 = (int64_t)diff0 * diff0;
227
228
      // Compute the distortion for the second candidate
229
315M
      const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
230
315M
      const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
231
315M
      int64_t distortion1;
232
315M
      if (x1 != 0) {
233
204M
        const int dqv_step =
234
204M
#if CONFIG_VP9_HIGHBITDEPTH
235
204M
            (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
236
204M
                                                          :
237
204M
#endif  // CONFIG_VP9_HIGHBITDEPTH
238
204M
                                                          dqv;
239
204M
        const int diff_step = (dqv_step + sign) ^ sign;
240
204M
        const int diff1 = diff0 - diff_step;
241
204M
        assert(dqv > 0);  // We aren't right shifting a negative number above.
242
204M
        distortion1 = (int64_t)diff1 * diff1;
243
204M
      } else {
244
110M
        distortion1 = distortion_for_zero;
245
110M
      }
246
315M
      {
247
        // Calculate RDCost for current coeff for the two candidates.
248
315M
        const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
249
315M
        const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
250
315M
        rate0 =
251
315M
            base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
252
315M
        rate1 =
253
315M
            base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
254
315M
      }
255
315M
      {
256
315M
        int rdcost_better_for_x1, eob_rdcost_better_for_x1;
257
315M
        int dqc0, dqc1;
258
315M
        int64_t best_eob_cost_cur;
259
315M
        int use_x1;
260
261
        // Calculate RD Cost effect on the next coeff for the two candidates.
262
315M
        int64_t next_bits0 = 0;
263
315M
        int64_t next_bits1 = 0;
264
315M
        int64_t next_eob_bits0 = 0;
265
315M
        int64_t next_eob_bits1 = 0;
266
315M
        if (i < default_eob - 1) {
267
307M
          int ctx_next, token_tree_sel_next;
268
307M
          const int band_next = band_translate[i + 1];
269
307M
          const int token_next =
270
307M
              (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
271
307M
          ASSUME_VALID_TOKEN(token_next);
272
307M
          unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
273
307M
                                               [ENTROPY_TOKENS] =
274
307M
                                                   token_costs + band_next;
275
307M
          token_cache[rc] = vp9_pt_energy_class[t0];
276
307M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
277
          // token_cache is initialized with valid energy classes.
278
          // get_coef_context returns at most the maximum value of
279
          // token_cache.
280
307M
          ASSUME_VALID_ENERGY_CLASS(ctx_next);
281
307M
          token_tree_sel_next = (x == 0);
282
307M
          next_bits0 =
283
307M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
284
307M
          next_eob_bits0 =
285
307M
              (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
286
307M
          token_cache[rc] = vp9_pt_energy_class[t1];
287
307M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
288
          // token_cache is initialized with valid energy classes.
289
          // get_coef_context returns at most the maximum value of
290
          // token_cache.
291
307M
          ASSUME_VALID_ENERGY_CLASS(ctx_next);
292
307M
          token_tree_sel_next = (x1 == 0);
293
307M
          next_bits1 =
294
307M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
295
307M
          if (x1 != 0) {
296
200M
            next_eob_bits1 =
297
200M
                (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
298
200M
          }
299
307M
        }
300
301
        // Compare the total RD costs for two candidates.
302
315M
        rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
303
315M
        rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
304
315M
        rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
305
315M
        eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
306
315M
                           (accu_error + distortion0 - distortion_for_zero));
307
315M
        eob_cost1 = eob_cost0;
308
315M
        if (x1 != 0) {
309
204M
          eob_cost1 =
310
204M
              RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
311
204M
                     (accu_error + distortion1 - distortion_for_zero));
312
204M
          eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
313
204M
        } else {
314
110M
          eob_rdcost_better_for_x1 = 0;
315
110M
        }
316
317
        // Calculate the two candidate de-quantized values.
318
315M
        dqc0 = dqcoeff[rc];
319
315M
        dqc1 = 0;
320
315M
        if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
321
4.17M
          if (x1 != 0) {
322
1.78M
            dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
323
2.39M
          } else {
324
2.39M
            dqc1 = 0;
325
2.39M
          }
326
4.17M
        }
327
328
        // Pick and record the better quantized and de-quantized values.
329
315M
        if (rdcost_better_for_x1) {
330
4.03M
          qcoeff[rc] = x1;
331
4.03M
          dqcoeff[rc] = dqc1;
332
4.03M
          accu_rate += rate1;
333
4.03M
          accu_error += distortion1 - distortion_for_zero;
334
4.03M
          assert(distortion1 <= distortion_for_zero);
335
4.03M
          token_cache[rc] = vp9_pt_energy_class[t1];
336
311M
        } else {
337
311M
          accu_rate += rate0;
338
311M
          accu_error += distortion0 - distortion_for_zero;
339
311M
          assert(distortion0 <= distortion_for_zero);
340
311M
          token_cache[rc] = vp9_pt_energy_class[t0];
341
311M
        }
342
315M
        if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
343
315M
        assert(accu_error >= 0);
344
315M
        x_prev = qcoeff[rc];  // Update based on selected quantized value.
345
346
315M
        use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
347
315M
        best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
348
349
        // Determine whether to move the eob position to i+1
350
315M
        if (best_eob_cost_cur < best_block_rd_cost) {
351
299M
          best_block_rd_cost = best_eob_cost_cur;
352
299M
          final_eob = i + 1;
353
299M
          count_high_values_after_eob = 0;
354
299M
          if (use_x1) {
355
1.77M
            before_best_eob_qc = x1;
356
1.77M
            before_best_eob_dqc = dqc1;
357
297M
          } else {
358
297M
            before_best_eob_qc = x;
359
297M
            before_best_eob_dqc = dqc0;
360
297M
          }
361
299M
        }
362
315M
      }
363
315M
    }
364
547M
  }
365
29.1M
  if (count_high_values_after_eob > 0) {
366
0
    final_eob = eob - 1;
367
0
    for (; final_eob >= 0; final_eob--) {
368
0
      const int rc = scan[final_eob];
369
0
      ASSUME_VALID_SCAN_VALUE(rc);
370
0
      const int x = qcoeff[rc];
371
0
      if (x) {
372
0
        break;
373
0
      }
374
0
    }
375
0
    final_eob++;
376
29.1M
  } else {
377
29.1M
    assert(final_eob <= eob);
378
29.1M
    if (final_eob > 0) {
379
16.5M
      int rc;
380
16.5M
      assert(before_best_eob_qc != 0);
381
16.5M
      i = final_eob - 1;
382
16.5M
      rc = scan[i];
383
16.5M
      ASSUME_VALID_SCAN_VALUE(rc);
384
16.5M
      qcoeff[rc] = before_best_eob_qc;
385
16.5M
      dqcoeff[rc] = before_best_eob_dqc;
386
16.5M
    }
387
64.3M
    for (i = final_eob; i < eob; i++) {
388
35.2M
      int rc = scan[i];
389
35.2M
      ASSUME_VALID_SCAN_VALUE(rc);
390
35.2M
      qcoeff[rc] = 0;
391
35.2M
      dqcoeff[rc] = 0;
392
35.2M
    }
393
29.1M
  }
394
29.1M
  mb->plane[plane].eobs[block] = final_eob;
395
29.1M
  return final_eob;
396
29.1M
}
397
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
398
399
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
400
4.62M
                             tran_low_t *dst, int src_stride) {
401
4.62M
  if (rd_transform)
402
4.43M
    vpx_fdct32x32_rd(src, dst, src_stride);
403
197k
  else
404
197k
    vpx_fdct32x32(src, dst, src_stride);
405
4.62M
}
406
407
#if CONFIG_VP9_HIGHBITDEPTH
408
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
409
0
                                    tran_low_t *dst, int src_stride) {
410
0
  if (rd_transform)
411
0
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
412
0
  else
413
0
    vpx_highbd_fdct32x32(src, dst, src_stride);
414
0
}
415
#endif  // CONFIG_VP9_HIGHBITDEPTH
416
417
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
418
0
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
419
0
  MACROBLOCKD *const xd = &x->e_mbd;
420
0
  const struct macroblock_plane *const p = &x->plane[plane];
421
0
  const struct macroblockd_plane *const pd = &xd->plane[plane];
422
0
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
423
0
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
424
0
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
425
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
426
0
  uint16_t *const eob = &p->eobs[block];
427
0
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
428
0
  const int16_t *src_diff;
429
0
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
430
  // skip block condition should be handled before this is called.
431
0
  assert(!x->skip_block);
432
433
0
#if CONFIG_VP9_HIGHBITDEPTH
434
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
435
0
    switch (tx_size) {
436
0
      case TX_32X32:
437
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
438
0
        vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff,
439
0
                                     pd->dequant, eob, scan_order);
440
0
        break;
441
0
      case TX_16X16:
442
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
443
0
        vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
444
0
                               scan_order);
445
0
        break;
446
0
      case TX_8X8:
447
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
448
0
        vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
449
0
                               scan_order);
450
0
        break;
451
0
      default:
452
0
        assert(tx_size == TX_4X4);
453
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
454
0
        vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
455
0
                               scan_order);
456
0
        break;
457
0
    }
458
0
    return;
459
0
  }
460
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
461
462
0
  switch (tx_size) {
463
0
    case TX_32X32:
464
0
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
465
0
      vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob,
466
0
                            scan_order);
467
0
      break;
468
0
    case TX_16X16:
469
0
      vpx_fdct16x16(src_diff, coeff, diff_stride);
470
0
      vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
471
0
                      scan_order);
472
0
      break;
473
0
    case TX_8X8:
474
0
      vpx_fdct8x8(src_diff, coeff, diff_stride);
475
0
      vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
476
0
                      scan_order);
477
478
0
      break;
479
0
    default:
480
0
      assert(tx_size == TX_4X4);
481
0
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
482
0
      vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
483
0
                      scan_order);
484
0
      break;
485
0
  }
486
0
}
487
488
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
489
425k
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
490
425k
  MACROBLOCKD *const xd = &x->e_mbd;
491
425k
  const struct macroblock_plane *const p = &x->plane[plane];
492
425k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
493
425k
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
494
425k
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
495
425k
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
496
425k
  uint16_t *const eob = &p->eobs[block];
497
425k
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
498
425k
  const int16_t *src_diff;
499
425k
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
500
  // skip block condition should be handled before this is called.
501
425k
  assert(!x->skip_block);
502
503
425k
#if CONFIG_VP9_HIGHBITDEPTH
504
425k
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
505
0
    switch (tx_size) {
506
0
      case TX_32X32:
507
0
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
508
0
        vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
509
0
                                     dqcoeff, pd->dequant[0], eob);
510
0
        break;
511
0
      case TX_16X16:
512
0
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
513
0
        vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
514
0
                               dqcoeff, pd->dequant[0], eob);
515
0
        break;
516
0
      case TX_8X8:
517
0
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
518
0
        vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
519
0
                               dqcoeff, pd->dequant[0], eob);
520
0
        break;
521
0
      default:
522
0
        assert(tx_size == TX_4X4);
523
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
524
0
        vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
525
0
                               dqcoeff, pd->dequant[0], eob);
526
0
        break;
527
0
    }
528
0
    return;
529
0
  }
530
425k
#endif  // CONFIG_VP9_HIGHBITDEPTH
531
532
425k
  switch (tx_size) {
533
4.27k
    case TX_32X32:
534
4.27k
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
535
4.27k
      vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
536
4.27k
                            pd->dequant[0], eob);
537
4.27k
      break;
538
10.0k
    case TX_16X16:
539
10.0k
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
540
10.0k
      vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
541
10.0k
                      pd->dequant[0], eob);
542
10.0k
      break;
543
62.0k
    case TX_8X8:
544
62.0k
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
545
62.0k
      vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
546
62.0k
                      pd->dequant[0], eob);
547
62.0k
      break;
548
349k
    default:
549
349k
      assert(tx_size == TX_4X4);
550
349k
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
551
349k
      vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
552
349k
                      pd->dequant[0], eob);
553
349k
      break;
554
425k
  }
555
425k
}
556
557
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
558
64.6M
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
559
64.6M
  MACROBLOCKD *const xd = &x->e_mbd;
560
64.6M
  const struct macroblock_plane *const p = &x->plane[plane];
561
64.6M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
562
64.6M
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
563
64.6M
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
564
64.6M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
565
64.6M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
566
64.6M
  uint16_t *const eob = &p->eobs[block];
567
64.6M
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
568
64.6M
  const int16_t *src_diff;
569
64.6M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
570
  // skip block condition should be handled before this is called.
571
64.6M
  assert(!x->skip_block);
572
573
64.6M
#if CONFIG_VP9_HIGHBITDEPTH
574
64.6M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
575
0
    switch (tx_size) {
576
0
      case TX_32X32:
577
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
578
0
        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
579
0
                                    scan_order);
580
0
        break;
581
0
      case TX_16X16:
582
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
583
0
        vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
584
0
                              scan_order);
585
0
        break;
586
0
      case TX_8X8:
587
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
588
0
        vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
589
0
                              scan_order);
590
0
        break;
591
0
      default:
592
0
        assert(tx_size == TX_4X4);
593
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
594
0
        vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
595
0
                              scan_order);
596
0
        break;
597
0
    }
598
0
    return;
599
0
  }
600
64.6M
#endif  // CONFIG_VP9_HIGHBITDEPTH
601
602
64.6M
  switch (tx_size) {
603
1.01M
    case TX_32X32:
604
1.01M
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
605
1.01M
      vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
606
1.01M
                           scan_order);
607
1.01M
      break;
608
4.23M
    case TX_16X16:
609
4.23M
      vpx_fdct16x16(src_diff, coeff, diff_stride);
610
4.23M
      vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
611
4.23M
                     scan_order);
612
4.23M
      break;
613
17.6M
    case TX_8X8:
614
17.6M
      vpx_fdct8x8(src_diff, coeff, diff_stride);
615
17.6M
      vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
616
17.6M
                     scan_order);
617
17.6M
      break;
618
41.7M
    default:
619
41.7M
      assert(tx_size == TX_4X4);
620
41.7M
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
621
41.7M
      vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
622
41.7M
                     scan_order);
623
41.7M
      break;
624
64.6M
  }
625
64.6M
}
626
627
static void encode_block(int plane, int block, int row, int col,
628
10.3M
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
629
10.3M
  struct encode_b_args *const args = arg;
630
#if CONFIG_MISMATCH_DEBUG
631
  int mi_row = args->mi_row;
632
  int mi_col = args->mi_col;
633
  int output_enabled = args->output_enabled;
634
#endif
635
10.3M
  MACROBLOCK *const x = args->x;
636
10.3M
  MACROBLOCKD *const xd = &x->e_mbd;
637
10.3M
  struct macroblock_plane *const p = &x->plane[plane];
638
10.3M
  struct macroblockd_plane *const pd = &xd->plane[plane];
639
10.3M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
640
10.3M
  uint8_t *dst;
641
10.3M
  ENTROPY_CONTEXT *a, *l;
642
10.3M
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
643
10.3M
  a = &args->ta[col];
644
10.3M
  l = &args->tl[row];
645
646
  // TODO(jingning): per transformed block zero forcing only enabled for
647
  // luma component. will integrate chroma components as well.
648
10.3M
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
649
3.11M
    p->eobs[block] = 0;
650
3.11M
    *a = *l = 0;
651
#if CONFIG_MISMATCH_DEBUG
652
    goto encode_block_end;
653
#else
654
3.11M
    return;
655
3.11M
#endif
656
3.11M
  }
657
658
7.20M
  if (!x->skip_recode) {
659
7.20M
    if (x->quant_fp) {
660
      // Encoding process for rtc mode
661
0
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
662
        // skip forward transform
663
0
        p->eobs[block] = 0;
664
0
        *a = *l = 0;
665
#if CONFIG_MISMATCH_DEBUG
666
        goto encode_block_end;
667
#else
668
0
        return;
669
0
#endif
670
0
      } else {
671
0
        vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
672
0
      }
673
7.20M
    } else {
674
7.20M
      if (max_txsize_lookup[plane_bsize] == tx_size) {
675
4.45M
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
676
4.45M
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
677
          // full forward transform and quantization
678
4.45M
          vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
679
4.45M
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
680
          // fast path forward transform and quantization
681
0
          vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
682
0
        } else {
683
          // skip forward transform
684
0
          p->eobs[block] = 0;
685
0
          *a = *l = 0;
686
#if CONFIG_MISMATCH_DEBUG
687
          goto encode_block_end;
688
#else
689
0
          return;
690
0
#endif
691
0
        }
692
4.45M
      } else {
693
2.74M
        vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
694
2.74M
      }
695
7.20M
    }
696
7.20M
  }
697
698
7.20M
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
699
0
    const int ctx = combine_entropy_contexts(*a, *l);
700
0
    *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
701
7.20M
  } else {
702
7.20M
    *a = *l = p->eobs[block] > 0;
703
7.20M
  }
704
705
7.20M
  if (p->eobs[block]) *(args->skip) = 0;
706
707
7.20M
  if (x->skip_encode || p->eobs[block] == 0) {
708
#if CONFIG_MISMATCH_DEBUG
709
    goto encode_block_end;
710
#else
711
525k
    return;
712
525k
#endif
713
525k
  }
714
6.67M
#if CONFIG_VP9_HIGHBITDEPTH
715
6.67M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
716
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
717
0
    switch (tx_size) {
718
0
      case TX_32X32:
719
0
        vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
720
0
                                 xd->bd);
721
0
        break;
722
0
      case TX_16X16:
723
0
        vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
724
0
                                 xd->bd);
725
0
        break;
726
0
      case TX_8X8:
727
0
        vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
728
0
                               xd->bd);
729
0
        break;
730
0
      default:
731
0
        assert(tx_size == TX_4X4);
732
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
733
        // which is significant (not just an optimization) for the lossless
734
        // case.
735
0
        x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
736
0
                               xd->bd);
737
0
        break;
738
0
    }
739
#if CONFIG_MISMATCH_DEBUG
740
    goto encode_block_end;
741
#else
742
0
    return;
743
0
#endif
744
0
  }
745
6.67M
#endif  // CONFIG_VP9_HIGHBITDEPTH
746
747
6.67M
  switch (tx_size) {
748
7.57k
    case TX_32X32:
749
7.57k
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
750
7.57k
      break;
751
60.0k
    case TX_16X16:
752
60.0k
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
753
60.0k
      break;
754
436k
    case TX_8X8:
755
436k
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
756
436k
      break;
757
6.17M
    default:
758
6.17M
      assert(tx_size == TX_4X4);
759
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
760
      // which is significant (not just an optimization) for the lossless
761
      // case.
762
6.17M
      x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
763
6.17M
      break;
764
6.67M
  }
765
#if CONFIG_MISMATCH_DEBUG
766
encode_block_end:
767
  if (output_enabled) {
768
    int pixel_c, pixel_r;
769
    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
770
    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
771
    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
772
                    pd->subsampling_x, pd->subsampling_y);
773
    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
774
                             blk_w, blk_h,
775
                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
776
  }
777
#endif
778
6.67M
}
779
780
static void encode_block_pass1(int plane, int block, int row, int col,
781
                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
782
0
                               void *arg) {
783
0
  MACROBLOCK *const x = (MACROBLOCK *)arg;
784
0
  MACROBLOCKD *const xd = &x->e_mbd;
785
0
  struct macroblock_plane *const p = &x->plane[plane];
786
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
787
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
788
0
  uint8_t *dst;
789
0
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
790
791
0
  vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
792
793
0
  if (p->eobs[block] > 0) {
794
0
#if CONFIG_VP9_HIGHBITDEPTH
795
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
796
0
      x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
797
0
                             p->eobs[block], xd->bd);
798
0
      return;
799
0
    }
800
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
801
0
    x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
802
0
  }
803
0
}
804
805
0
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
806
0
  vp9_subtract_plane(x, bsize, 0);
807
0
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
808
0
                                         encode_block_pass1, x);
809
0
}
810
811
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
812
2.27M
                   int output_enabled) {
813
2.27M
  MACROBLOCKD *const xd = &x->e_mbd;
814
2.27M
  struct optimize_ctx ctx;
815
2.27M
  MODE_INFO *mi = xd->mi[0];
816
2.27M
  int plane;
817
#if CONFIG_MISMATCH_DEBUG
818
  struct encode_b_args arg = { x,
819
                               1,     // enable_trellis_opt
820
                               0.0,   // trellis_opt_thresh
821
                               NULL,  // &sse_calc_done
822
                               NULL,  // &sse
823
                               NULL,  // above entropy context
824
                               NULL,  // left entropy context
825
                               &mi->skip, mi_row, mi_col, output_enabled };
826
#else
827
2.27M
  struct encode_b_args arg = { x,
828
2.27M
                               1,     // enable_trellis_opt
829
2.27M
                               0.0,   // trellis_opt_thresh
830
2.27M
                               NULL,  // &sse_calc_done
831
2.27M
                               NULL,  // &sse
832
2.27M
                               NULL,  // above entropy context
833
2.27M
                               NULL,  // left entropy context
834
2.27M
                               &mi->skip };
835
2.27M
  (void)mi_row;
836
2.27M
  (void)mi_col;
837
2.27M
  (void)output_enabled;
838
2.27M
#endif
839
840
2.27M
  mi->skip = 1;
841
842
2.27M
  if (x->skip) return;
843
844
7.46M
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
845
5.59M
    if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
846
847
5.59M
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
848
0
      const struct macroblockd_plane *const pd = &xd->plane[plane];
849
0
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
850
0
      vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
851
0
                               ctx.tl[plane]);
852
0
      arg.enable_trellis_opt = 1;
853
5.59M
    } else {
854
5.59M
      arg.enable_trellis_opt = 0;
855
5.59M
    }
856
5.59M
    arg.ta = ctx.ta[plane];
857
5.59M
    arg.tl = ctx.tl[plane];
858
859
5.59M
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
860
5.59M
                                           &arg);
861
5.59M
  }
862
1.86M
}
863
864
void vp9_encode_block_intra(int plane, int block, int row, int col,
865
                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
866
324M
                            void *arg) {
867
324M
  struct encode_b_args *const args = arg;
868
324M
  MACROBLOCK *const x = args->x;
869
324M
  MACROBLOCKD *const xd = &x->e_mbd;
870
324M
  MODE_INFO *mi = xd->mi[0];
871
324M
  struct macroblock_plane *const p = &x->plane[plane];
872
324M
  struct macroblockd_plane *const pd = &xd->plane[plane];
873
324M
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
874
324M
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
875
324M
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
876
324M
  const ScanOrder *scan_order;
877
324M
  TX_TYPE tx_type = DCT_DCT;
878
324M
  PREDICTION_MODE mode;
879
324M
  const int bwl = b_width_log2_lookup[plane_bsize];
880
324M
  const int diff_stride = 4 * (1 << bwl);
881
324M
  uint8_t *src, *dst;
882
324M
  int16_t *src_diff;
883
324M
  uint16_t *eob = &p->eobs[block];
884
324M
  const int src_stride = p->src.stride;
885
324M
  const int dst_stride = pd->dst.stride;
886
324M
  int enable_trellis_opt = !x->skip_recode;
887
324M
  ENTROPY_CONTEXT *a = NULL;
888
324M
  ENTROPY_CONTEXT *l = NULL;
889
324M
  int entropy_ctx = 0;
890
324M
  dst = &pd->dst.buf[4 * (row * dst_stride + col)];
891
324M
  src = &p->src.buf[4 * (row * src_stride + col)];
892
324M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
893
894
324M
  if (tx_size == TX_4X4) {
895
242M
    tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
896
242M
    scan_order = &vp9_scan_orders[TX_4X4][tx_type];
897
242M
    mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
898
242M
  } else {
899
81.4M
    mode = plane == 0 ? mi->mode : mi->uv_mode;
900
81.4M
    if (tx_size == TX_32X32) {
901
3.60M
      scan_order = &vp9_default_scan_orders[TX_32X32];
902
77.8M
    } else {
903
77.8M
      tx_type = get_tx_type(get_plane_type(plane), xd);
904
77.8M
      scan_order = &vp9_scan_orders[tx_size][tx_type];
905
77.8M
    }
906
81.4M
  }
907
908
324M
  vp9_predict_intra_block(
909
324M
      xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
910
324M
      (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
911
324M
      dst_stride, col, row, plane);
912
913
  // skip block condition should be handled before this is called.
914
324M
  assert(!x->skip_block);
915
916
324M
  if (!x->skip_recode) {
917
324M
    const int tx_size_in_pixels = (1 << tx_size) << 2;
918
324M
#if CONFIG_VP9_HIGHBITDEPTH
919
324M
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
920
0
      vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
921
0
                                diff_stride, src, src_stride, dst, dst_stride,
922
0
                                xd->bd);
923
324M
    } else {
924
324M
      vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
925
324M
                         diff_stride, src, src_stride, dst, dst_stride);
926
324M
    }
927
#else
928
    vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
929
                       diff_stride, src, src_stride, dst, dst_stride);
930
#endif
931
324M
    enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col,
932
324M
                                        plane_bsize, tx_size, args);
933
324M
  }
934
935
324M
  if (enable_trellis_opt) {
936
24.7M
    a = &args->ta[col];
937
24.7M
    l = &args->tl[row];
938
24.7M
    entropy_ctx = combine_entropy_contexts(*a, *l);
939
24.7M
  }
940
941
324M
#if CONFIG_VP9_HIGHBITDEPTH
942
324M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
943
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
944
0
    switch (tx_size) {
945
0
      case TX_32X32:
946
0
        if (!x->skip_recode) {
947
0
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
948
0
          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
949
0
                                      eob, scan_order);
950
0
        }
951
0
        if (enable_trellis_opt) {
952
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
953
0
        }
954
0
        if (!x->skip_encode && *eob) {
955
0
          vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
956
0
        }
957
0
        break;
958
0
      case TX_16X16:
959
0
        if (!x->skip_recode) {
960
0
          if (tx_type == DCT_DCT)
961
0
            vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
962
0
          else
963
0
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
964
0
          vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant,
965
0
                                eob, scan_order);
966
0
        }
967
0
        if (enable_trellis_opt) {
968
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
969
0
        }
970
0
        if (!x->skip_encode && *eob) {
971
0
          vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
972
0
                                  xd->bd);
973
0
        }
974
0
        break;
975
0
      case TX_8X8:
976
0
        if (!x->skip_recode) {
977
0
          if (tx_type == DCT_DCT)
978
0
            vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
979
0
          else
980
0
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
981
0
          vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
982
0
                                scan_order);
983
0
        }
984
0
        if (enable_trellis_opt) {
985
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
986
0
        }
987
0
        if (!x->skip_encode && *eob) {
988
0
          vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
989
0
                                xd->bd);
990
0
        }
991
0
        break;
992
0
      default:
993
0
        assert(tx_size == TX_4X4);
994
0
        if (!x->skip_recode) {
995
0
          if (tx_type != DCT_DCT)
996
0
            vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
997
0
          else
998
0
            x->fwd_txfm4x4(src_diff, coeff, diff_stride);
999
0
          vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1000
0
                                scan_order);
1001
0
        }
1002
0
        if (enable_trellis_opt) {
1003
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004
0
        }
1005
0
        if (!x->skip_encode && *eob) {
1006
0
          if (tx_type == DCT_DCT) {
1007
            // this is like vp9_short_idct4x4 but has a special case around
1008
            // eob<=1 which is significant (not just an optimization) for the
1009
            // lossless case.
1010
0
            x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
1011
0
          } else {
1012
0
            vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
1013
0
                                     xd->bd);
1014
0
          }
1015
0
        }
1016
0
        break;
1017
0
    }
1018
0
    if (*eob) *(args->skip) = 0;
1019
0
    return;
1020
0
  }
1021
324M
#endif  // CONFIG_VP9_HIGHBITDEPTH
1022
1023
324M
  switch (tx_size) {
1024
3.60M
    case TX_32X32:
1025
3.60M
      if (!x->skip_recode) {
1026
3.60M
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
1027
3.60M
        vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
1028
3.60M
                             scan_order);
1029
3.60M
      }
1030
3.60M
      if (enable_trellis_opt) {
1031
511k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1032
511k
      }
1033
3.60M
      if (!x->skip_encode && *eob)
1034
2.57M
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
1035
3.60M
      break;
1036
13.6M
    case TX_16X16:
1037
13.6M
      if (!x->skip_recode) {
1038
13.6M
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
1039
13.6M
        vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
1040
13.6M
                       scan_order);
1041
13.6M
      }
1042
13.6M
      if (enable_trellis_opt) {
1043
989k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1044
989k
      }
1045
13.6M
      if (!x->skip_encode && *eob)
1046
11.3M
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1047
13.6M
      break;
1048
64.2M
    case TX_8X8:
1049
64.2M
      if (!x->skip_recode) {
1050
64.2M
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
1051
64.2M
        vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
1052
64.2M
                       scan_order);
1053
64.2M
      }
1054
64.2M
      if (enable_trellis_opt) {
1055
4.25M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1056
4.25M
      }
1057
64.2M
      if (!x->skip_encode && *eob)
1058
52.7M
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1059
64.2M
      break;
1060
242M
    default:
1061
242M
      assert(tx_size == TX_4X4);
1062
242M
      if (!x->skip_recode) {
1063
242M
        if (tx_type != DCT_DCT)
1064
21.0M
          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1065
221M
        else
1066
221M
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1067
242M
        vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1068
242M
                       scan_order);
1069
242M
      }
1070
242M
      if (enable_trellis_opt) {
1071
19.0M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1072
19.0M
      }
1073
242M
      if (!x->skip_encode && *eob) {
1074
188M
        if (tx_type == DCT_DCT)
1075
          // this is like vp9_short_idct4x4 but has a special case around eob<=1
1076
          // which is significant (not just an optimization) for the lossless
1077
          // case.
1078
171M
          x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1079
16.9M
        else
1080
16.9M
          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1081
188M
      }
1082
242M
      break;
1083
324M
  }
1084
324M
  if (*eob) *(args->skip) = 0;
1085
324M
}
1086
1087
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1088
17.2M
                                  int enable_trellis_opt) {
1089
17.2M
  const MACROBLOCKD *const xd = &x->e_mbd;
1090
17.2M
  struct optimize_ctx ctx;
1091
#if CONFIG_MISMATCH_DEBUG
1092
  // TODO(angiebird): make mismatch_debug support intra mode
1093
  struct encode_b_args arg = {
1094
    x,
1095
    enable_trellis_opt,
1096
    0.0,   // trellis_opt_thresh
1097
    NULL,  // &sse_calc_done
1098
    NULL,  // &sse
1099
    ctx.ta[plane],
1100
    ctx.tl[plane],
1101
    &xd->mi[0]->skip,
1102
    0,  // mi_row
1103
    0,  // mi_col
1104
    0   // output_enabled
1105
  };
1106
#else
1107
17.2M
  struct encode_b_args arg = { x,
1108
17.2M
                               enable_trellis_opt,
1109
17.2M
                               0.0,   // trellis_opt_thresh
1110
17.2M
                               NULL,  // &sse_calc_done
1111
17.2M
                               NULL,  // &sse
1112
17.2M
                               ctx.ta[plane],
1113
17.2M
                               ctx.tl[plane],
1114
17.2M
                               &xd->mi[0]->skip };
1115
17.2M
#endif
1116
1117
17.2M
  if (enable_trellis_opt && x->optimize &&
1118
0
      (!x->skip_recode || !x->skip_optimize)) {
1119
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1120
0
    const TX_SIZE tx_size =
1121
0
        plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1122
0
    vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1123
17.2M
  } else {
1124
17.2M
    arg.enable_trellis_opt = 0;
1125
17.2M
  }
1126
1127
17.2M
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1128
17.2M
                                         vp9_encode_block_intra, &arg);
1129
17.2M
}