Coverage Report

Created: 2026-02-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp9/encoder/vp9_encodemb.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <stdlib.h>
12
13
#include "./vp9_rtcd.h"
14
#include "./vpx_config.h"
15
#include "./vpx_dsp_rtcd.h"
16
17
#include "vpx_dsp/quantize.h"
18
#include "vpx_mem/vpx_mem.h"
19
#include "vpx_ports/mem.h"
20
21
#if CONFIG_MISMATCH_DEBUG
22
#include "vpx_util/vpx_debug_util.h"
23
#endif
24
25
#include "vp9/common/vp9_idct.h"
26
#include "vp9/common/vp9_reconinter.h"
27
#include "vp9/common/vp9_reconintra.h"
28
#include "vp9/common/vp9_scan.h"
29
30
#include "vp9/encoder/vp9_encodemb.h"
31
#include "vp9/encoder/vp9_encoder.h"
32
#include "vp9/encoder/vp9_rd.h"
33
#include "vp9/encoder/vp9_tokenize.h"
34
35
#if defined(NDEBUG)
36
#if defined(__clang__) && defined(__has_builtin)
37
#if __has_builtin(__builtin_assume)
38
// This is verified by test/vp9_scan_test.cc
39
#define ASSUME_VALID_SCAN_VALUE(i) \
40
916M
  __builtin_assume(0 <= i && i <= MAX_SCAN_VALUE)
41
// This is verified by test/vp9_entropy_test.cc
42
#define ASSUME_VALID_ENERGY_CLASS(i) \
43
933M
  __builtin_assume(0 <= i && i <= MAX_ENERGY_CLASS)
44
869M
#define ASSUME_VALID_TOKEN(i) __builtin_assume(0 <= i && i <= MAX_TOKEN)
45
#else
46
#define ASSUME_VALID_SCAN_VALUE(i) \
47
  do {                             \
48
  } while (0)
49
#define ASSUME_VALID_ENERGY_CLASS(i) \
50
  do {                               \
51
  } while (0)
52
#define ASSUME_VALID_TOKEN(i) \
53
  do {                        \
54
  } while (0)
55
#endif
56
#else
57
#define ASSUME_VALID_SCAN_VALUE(i) \
58
  do {                             \
59
  } while (0)
60
#define ASSUME_VALID_ENERGY_CLASS(i) \
61
  do {                               \
62
  } while (0)
63
#define ASSUME_VALID_TOKEN(i) \
64
  do {                        \
65
  } while (0)
66
#endif
67
#else
68
#define ASSUME_VALID_SCAN_VALUE(i) assert(0 <= i && i <= MAX_SCAN_VALUE)
69
#define ASSUME_VALID_ENERGY_CLASS(i) assert(0 <= i && i <= MAX_ENERGY_CLASS)
70
#define ASSUME_VALID_TOKEN(i) assert(0 <= i && i <= MAX_TOKEN)
71
#endif
72
73
struct optimize_ctx {
74
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
75
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
76
};
77
78
30.5M
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
79
30.5M
  struct macroblock_plane *const p = &x->plane[plane];
80
30.5M
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
81
30.5M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
82
30.5M
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
83
30.5M
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
84
85
30.5M
#if CONFIG_VP9_HIGHBITDEPTH
86
30.5M
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
87
0
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
88
0
                              p->src.stride, pd->dst.buf, pd->dst.stride,
89
0
                              x->e_mbd.bd);
90
0
    return;
91
0
  }
92
30.5M
#endif  // CONFIG_VP9_HIGHBITDEPTH
93
30.5M
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
94
30.5M
                     pd->dst.buf, pd->dst.stride);
95
30.5M
}
96
97
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
98
  { 10, 6 },
99
  { 8, 5 },
100
};
101
102
// 'num' can be negative, but 'shift' must be non-negative.
103
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
104
1.42M
  (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
105
106
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
107
23.2M
                   int ctx) {
108
23.2M
  MACROBLOCKD *const xd = &mb->e_mbd;
109
23.2M
  struct macroblock_plane *const p = &mb->plane[plane];
110
23.2M
  struct macroblockd_plane *const pd = &xd->plane[plane];
111
23.2M
  const int ref = is_inter_block(xd->mi[0]);
112
23.2M
  uint8_t token_cache[MAX_SCAN_VALUE + 1];
113
23.2M
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
114
23.2M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
115
23.2M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
116
23.2M
  const int eob = p->eobs[block];
117
23.2M
  const PLANE_TYPE plane_type = get_plane_type(plane);
118
23.2M
  const int default_eob = 16 << (tx_size << 1);
119
23.2M
  const int shift = (tx_size == TX_32X32);
120
23.2M
  const int16_t *const dequant_ptr = pd->dequant;
121
23.2M
  const uint8_t *const band_translate = get_band_translate(tx_size);
122
23.2M
  const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block);
123
23.2M
  const int16_t *const scan = so->scan;
124
23.2M
  const int16_t *const nb = so->neighbors;
125
23.2M
  const MODE_INFO *mbmi = xd->mi[0];
126
23.2M
  const int sharpness = mb->sharpness;
127
23.2M
  const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
128
23.2M
  const int64_t rdmult =
129
23.2M
      (sharpness == 0 ? rdadj >> 1
130
23.2M
                      : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
131
132
23.2M
  const int64_t rddiv = mb->rddiv;
133
23.2M
  int64_t rd_cost0, rd_cost1;
134
23.2M
  int64_t rate0, rate1;
135
23.2M
  int16_t t0, t1;
136
23.2M
  int i, final_eob;
137
23.2M
  int count_high_values_after_eob = 0;
138
23.2M
#if CONFIG_VP9_HIGHBITDEPTH
139
23.2M
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
140
#else
141
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
142
#endif
143
23.2M
  unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
144
23.2M
      mb->token_costs[tx_size][plane_type][ref];
145
23.2M
  unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
146
23.2M
  int64_t eob_cost0, eob_cost1;
147
23.2M
  int64_t accu_rate = 0;
148
  // Initialized to the worst possible error for the largest transform size.
149
  // This ensures that it never goes negative.
150
23.2M
  int64_t accu_error = ((int64_t)1) << 50;
151
23.2M
  int64_t best_block_rd_cost = INT64_MAX;
152
23.2M
  int x_prev = 1;
153
23.2M
  tran_low_t before_best_eob_qc = 0;
154
23.2M
  tran_low_t before_best_eob_dqc = 0;
155
156
23.2M
  assert((!plane_type && !plane) || (plane_type && plane));
157
23.2M
  assert(eob <= default_eob);
158
159
460M
  for (i = 0; i < eob; i++) {
160
437M
    const int rc = scan[i];
161
437M
    ASSUME_VALID_SCAN_VALUE(rc);
162
437M
    int16_t token = vp9_get_token(qcoeff[rc]);
163
437M
    ASSUME_VALID_TOKEN(token);
164
437M
    token_cache[rc] = vp9_pt_energy_class[token];
165
437M
  }
166
23.2M
  final_eob = 0;
167
168
  // This is used in the first iteration, and must be inbounds. We cannot
169
  // locally verify that this is in bounds, so we need to verify at runtime.
170
  // For now, only verify if we have array-bounds turned on.
171
23.2M
#if defined(__clang__) && defined(__has_feature)
172
#if __has_feature(array_bounds_sanitizer)
173
  if (ctx < 0 || ctx > MAX_ENERGY_CLASS) {
174
    abort();
175
  }
176
#endif
177
23.2M
#endif
178
179
  // Initial RD cost.
180
23.2M
  token_costs_cur = token_costs + band_translate[0];
181
23.2M
  rate0 = (*token_costs_cur)[0][ctx][EOB_TOKEN];
182
23.2M
  best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
183
184
  // For each token, pick one of two choices greedily:
185
  // (i) First candidate: Keep current quantized value, OR
186
  // (ii) Second candidate: Reduce quantized value by 1.
187
460M
  for (i = 0; i < eob; i++) {
188
437M
    const int rc = scan[i];
189
437M
    ASSUME_VALID_SCAN_VALUE(rc);
190
437M
    const int x = qcoeff[rc];
191
437M
    const int band_cur = band_translate[i];
192
437M
    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
193
437M
    ASSUME_VALID_ENERGY_CLASS(ctx_cur);
194
437M
    const int token_tree_sel_cur = (x_prev == 0);
195
437M
    token_costs_cur = token_costs + band_cur;
196
437M
    if (x == 0) {  // No need to search
197
184M
      const int token = vp9_get_token(x);
198
184M
      ASSUME_VALID_TOKEN(token);
199
184M
      rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
200
184M
      accu_rate += rate0;
201
184M
      x_prev = 0;
202
      // Note: accu_error does not change.
203
253M
    } else {
204
253M
      const int dqv = dequant_ptr[rc != 0];
205
      // Compute the distortion for quantizing to 0.
206
253M
      const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
207
253M
      const int diff_for_zero =
208
253M
#if CONFIG_VP9_HIGHBITDEPTH
209
253M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
210
253M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
211
253M
              :
212
253M
#endif
213
253M
              diff_for_zero_raw;
214
253M
      const int64_t distortion_for_zero =
215
253M
          (int64_t)diff_for_zero * diff_for_zero;
216
217
      // Compute the distortion for the first candidate
218
253M
      const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
219
253M
      const int diff0 =
220
253M
#if CONFIG_VP9_HIGHBITDEPTH
221
253M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
222
253M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
223
253M
              :
224
253M
#endif  // CONFIG_VP9_HIGHBITDEPTH
225
253M
              diff0_raw;
226
253M
      const int64_t distortion0 = (int64_t)diff0 * diff0;
227
228
      // Compute the distortion for the second candidate
229
253M
      const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
230
253M
      const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
231
253M
      int64_t distortion1;
232
253M
      if (x1 != 0) {
233
169M
        const int dqv_step =
234
169M
#if CONFIG_VP9_HIGHBITDEPTH
235
169M
            (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
236
169M
                                                          :
237
169M
#endif  // CONFIG_VP9_HIGHBITDEPTH
238
169M
                                                          dqv;
239
169M
        const int diff_step = (dqv_step + sign) ^ sign;
240
169M
        const int diff1 = diff0 - diff_step;
241
169M
        assert(dqv > 0);  // We aren't right shifting a negative number above.
242
169M
        distortion1 = (int64_t)diff1 * diff1;
243
169M
      } else {
244
83.8M
        distortion1 = distortion_for_zero;
245
83.8M
      }
246
253M
      {
247
        // Calculate RDCost for current coeff for the two candidates.
248
253M
        const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
249
253M
        const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
250
253M
        rate0 =
251
253M
            base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
252
253M
        rate1 =
253
253M
            base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
254
253M
      }
255
253M
      {
256
253M
        int rdcost_better_for_x1, eob_rdcost_better_for_x1;
257
253M
        int dqc0, dqc1;
258
253M
        int64_t best_eob_cost_cur;
259
253M
        int use_x1;
260
261
        // Calculate RD Cost effect on the next coeff for the two candidates.
262
253M
        int64_t next_bits0 = 0;
263
253M
        int64_t next_bits1 = 0;
264
253M
        int64_t next_eob_bits0 = 0;
265
253M
        int64_t next_eob_bits1 = 0;
266
253M
        if (i < default_eob - 1) {
267
247M
          int ctx_next, token_tree_sel_next;
268
247M
          const int band_next = band_translate[i + 1];
269
247M
          const int token_next =
270
247M
              (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
271
247M
          ASSUME_VALID_TOKEN(token_next);
272
247M
          unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
273
247M
                                               [ENTROPY_TOKENS] =
274
247M
                                                   token_costs + band_next;
275
247M
          token_cache[rc] = vp9_pt_energy_class[t0];
276
247M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
277
          // token_cache is initialized with valid energy classes.
278
          // get_coef_context returns at most the maximum value of
279
          // token_cache.
280
247M
          ASSUME_VALID_ENERGY_CLASS(ctx_next);
281
247M
          token_tree_sel_next = (x == 0);
282
247M
          next_bits0 =
283
247M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
284
247M
          next_eob_bits0 =
285
247M
              (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
286
247M
          token_cache[rc] = vp9_pt_energy_class[t1];
287
247M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
288
          // token_cache is initialized with valid energy classes.
289
          // get_coef_context returns at most the maximum value of
290
          // token_cache.
291
247M
          ASSUME_VALID_ENERGY_CLASS(ctx_next);
292
247M
          token_tree_sel_next = (x1 == 0);
293
247M
          next_bits1 =
294
247M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
295
247M
          if (x1 != 0) {
296
165M
            next_eob_bits1 =
297
165M
                (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
298
165M
          }
299
247M
        }
300
301
        // Compare the total RD costs for two candidates.
302
253M
        rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
303
253M
        rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
304
253M
        rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
305
253M
        eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
306
253M
                           (accu_error + distortion0 - distortion_for_zero));
307
253M
        eob_cost1 = eob_cost0;
308
253M
        if (x1 != 0) {
309
169M
          eob_cost1 =
310
169M
              RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
311
169M
                     (accu_error + distortion1 - distortion_for_zero));
312
169M
          eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
313
169M
        } else {
314
83.8M
          eob_rdcost_better_for_x1 = 0;
315
83.8M
        }
316
317
        // Calculate the two candidate de-quantized values.
318
253M
        dqc0 = dqcoeff[rc];
319
253M
        dqc1 = 0;
320
253M
        if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
321
3.29M
          if (x1 != 0) {
322
1.42M
            dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
323
1.86M
          } else {
324
1.86M
            dqc1 = 0;
325
1.86M
          }
326
3.29M
        }
327
328
        // Pick and record the better quantized and de-quantized values.
329
253M
        if (rdcost_better_for_x1) {
330
3.17M
          qcoeff[rc] = x1;
331
3.17M
          dqcoeff[rc] = dqc1;
332
3.17M
          accu_rate += rate1;
333
3.17M
          accu_error += distortion1 - distortion_for_zero;
334
3.17M
          assert(distortion1 <= distortion_for_zero);
335
3.17M
          token_cache[rc] = vp9_pt_energy_class[t1];
336
250M
        } else {
337
250M
          accu_rate += rate0;
338
250M
          accu_error += distortion0 - distortion_for_zero;
339
250M
          assert(distortion0 <= distortion_for_zero);
340
250M
          token_cache[rc] = vp9_pt_energy_class[t0];
341
250M
        }
342
253M
        if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
343
253M
        assert(accu_error >= 0);
344
253M
        x_prev = qcoeff[rc];  // Update based on selected quantized value.
345
346
253M
        use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
347
253M
        best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
348
349
        // Determine whether to move the eob position to i+1
350
253M
        if (best_eob_cost_cur < best_block_rd_cost) {
351
240M
          best_block_rd_cost = best_eob_cost_cur;
352
240M
          final_eob = i + 1;
353
240M
          count_high_values_after_eob = 0;
354
240M
          if (use_x1) {
355
1.41M
            before_best_eob_qc = x1;
356
1.41M
            before_best_eob_dqc = dqc1;
357
239M
          } else {
358
239M
            before_best_eob_qc = x;
359
239M
            before_best_eob_dqc = dqc0;
360
239M
          }
361
240M
        }
362
253M
      }
363
253M
    }
364
437M
  }
365
23.2M
  if (count_high_values_after_eob > 0) {
366
0
    final_eob = eob - 1;
367
0
    for (; final_eob >= 0; final_eob--) {
368
0
      const int rc = scan[final_eob];
369
0
      ASSUME_VALID_SCAN_VALUE(rc);
370
0
      const int x = qcoeff[rc];
371
0
      if (x) {
372
0
        break;
373
0
      }
374
0
    }
375
0
    final_eob++;
376
23.2M
  } else {
377
23.2M
    assert(final_eob <= eob);
378
23.2M
    if (final_eob > 0) {
379
11.6M
      int rc;
380
11.6M
      assert(before_best_eob_qc != 0);
381
11.6M
      i = final_eob - 1;
382
11.6M
      rc = scan[i];
383
11.6M
      ASSUME_VALID_SCAN_VALUE(rc);
384
11.6M
      qcoeff[rc] = before_best_eob_qc;
385
11.6M
      dqcoeff[rc] = before_best_eob_dqc;
386
11.6M
    }
387
52.9M
    for (i = final_eob; i < eob; i++) {
388
29.6M
      int rc = scan[i];
389
29.6M
      ASSUME_VALID_SCAN_VALUE(rc);
390
29.6M
      qcoeff[rc] = 0;
391
29.6M
      dqcoeff[rc] = 0;
392
29.6M
    }
393
23.2M
  }
394
23.2M
  mb->plane[plane].eobs[block] = final_eob;
395
23.2M
  return final_eob;
396
23.2M
}
397
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
398
399
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
400
3.80M
                             tran_low_t *dst, int src_stride) {
401
3.80M
  if (rd_transform)
402
3.62M
    vpx_fdct32x32_rd(src, dst, src_stride);
403
176k
  else
404
176k
    vpx_fdct32x32(src, dst, src_stride);
405
3.80M
}
406
407
#if CONFIG_VP9_HIGHBITDEPTH
408
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
409
0
                                    tran_low_t *dst, int src_stride) {
410
0
  if (rd_transform)
411
0
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
412
0
  else
413
0
    vpx_highbd_fdct32x32(src, dst, src_stride);
414
0
}
415
#endif  // CONFIG_VP9_HIGHBITDEPTH
416
417
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
418
0
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
419
0
  MACROBLOCKD *const xd = &x->e_mbd;
420
0
  const struct macroblock_plane *const p = &x->plane[plane];
421
0
  const struct macroblockd_plane *const pd = &xd->plane[plane];
422
0
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
423
0
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
424
0
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
425
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
426
0
  uint16_t *const eob = &p->eobs[block];
427
0
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
428
0
  const int16_t *src_diff;
429
0
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
430
  // skip block condition should be handled before this is called.
431
0
  assert(!x->skip_block);
432
433
0
#if CONFIG_VP9_HIGHBITDEPTH
434
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
435
0
    switch (tx_size) {
436
0
      case TX_32X32:
437
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
438
0
        vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff,
439
0
                                     pd->dequant, eob, scan_order);
440
0
        break;
441
0
      case TX_16X16:
442
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
443
0
        vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
444
0
                               scan_order);
445
0
        break;
446
0
      case TX_8X8:
447
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
448
0
        vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
449
0
                               scan_order);
450
0
        break;
451
0
      default:
452
0
        assert(tx_size == TX_4X4);
453
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
454
0
        vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
455
0
                               scan_order);
456
0
        break;
457
0
    }
458
0
    return;
459
0
  }
460
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
461
462
0
  switch (tx_size) {
463
0
    case TX_32X32:
464
0
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
465
0
      vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob,
466
0
                            scan_order);
467
0
      break;
468
0
    case TX_16X16:
469
0
      vpx_fdct16x16(src_diff, coeff, diff_stride);
470
0
      vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
471
0
                      scan_order);
472
0
      break;
473
0
    case TX_8X8:
474
0
      vpx_fdct8x8(src_diff, coeff, diff_stride);
475
0
      vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
476
0
                      scan_order);
477
478
0
      break;
479
0
    default:
480
0
      assert(tx_size == TX_4X4);
481
0
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
482
0
      vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
483
0
                      scan_order);
484
0
      break;
485
0
  }
486
0
}
487
488
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
489
313k
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
490
313k
  MACROBLOCKD *const xd = &x->e_mbd;
491
313k
  const struct macroblock_plane *const p = &x->plane[plane];
492
313k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
493
313k
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
494
313k
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
495
313k
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
496
313k
  uint16_t *const eob = &p->eobs[block];
497
313k
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
498
313k
  const int16_t *src_diff;
499
313k
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
500
  // skip block condition should be handled before this is called.
501
313k
  assert(!x->skip_block);
502
503
313k
#if CONFIG_VP9_HIGHBITDEPTH
504
313k
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
505
0
    switch (tx_size) {
506
0
      case TX_32X32:
507
0
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
508
0
        vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
509
0
                                     dqcoeff, pd->dequant[0], eob);
510
0
        break;
511
0
      case TX_16X16:
512
0
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
513
0
        vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
514
0
                               dqcoeff, pd->dequant[0], eob);
515
0
        break;
516
0
      case TX_8X8:
517
0
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
518
0
        vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
519
0
                               dqcoeff, pd->dequant[0], eob);
520
0
        break;
521
0
      default:
522
0
        assert(tx_size == TX_4X4);
523
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
524
0
        vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
525
0
                               dqcoeff, pd->dequant[0], eob);
526
0
        break;
527
0
    }
528
0
    return;
529
0
  }
530
313k
#endif  // CONFIG_VP9_HIGHBITDEPTH
531
532
313k
  switch (tx_size) {
533
4.27k
    case TX_32X32:
534
4.27k
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
535
4.27k
      vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
536
4.27k
                            pd->dequant[0], eob);
537
4.27k
      break;
538
7.67k
    case TX_16X16:
539
7.67k
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
540
7.67k
      vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
541
7.67k
                      pd->dequant[0], eob);
542
7.67k
      break;
543
49.8k
    case TX_8X8:
544
49.8k
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
545
49.8k
      vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
546
49.8k
                      pd->dequant[0], eob);
547
49.8k
      break;
548
251k
    default:
549
251k
      assert(tx_size == TX_4X4);
550
251k
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
551
251k
      vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
552
251k
                      pd->dequant[0], eob);
553
251k
      break;
554
313k
  }
555
313k
}
556
557
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
558
48.7M
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
559
48.7M
  MACROBLOCKD *const xd = &x->e_mbd;
560
48.7M
  const struct macroblock_plane *const p = &x->plane[plane];
561
48.7M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
562
48.7M
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
563
48.7M
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
564
48.7M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
565
48.7M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
566
48.7M
  uint16_t *const eob = &p->eobs[block];
567
48.7M
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
568
48.7M
  const int16_t *src_diff;
569
48.7M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
570
  // skip block condition should be handled before this is called.
571
48.7M
  assert(!x->skip_block);
572
573
48.7M
#if CONFIG_VP9_HIGHBITDEPTH
574
48.7M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
575
0
    switch (tx_size) {
576
0
      case TX_32X32:
577
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
578
0
        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
579
0
                                    scan_order);
580
0
        break;
581
0
      case TX_16X16:
582
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
583
0
        vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
584
0
                              scan_order);
585
0
        break;
586
0
      case TX_8X8:
587
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
588
0
        vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
589
0
                              scan_order);
590
0
        break;
591
0
      default:
592
0
        assert(tx_size == TX_4X4);
593
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
594
0
        vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
595
0
                              scan_order);
596
0
        break;
597
0
    }
598
0
    return;
599
0
  }
600
48.7M
#endif  // CONFIG_VP9_HIGHBITDEPTH
601
602
48.7M
  switch (tx_size) {
603
731k
    case TX_32X32:
604
731k
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
605
731k
      vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
606
731k
                           scan_order);
607
731k
      break;
608
3.12M
    case TX_16X16:
609
3.12M
      vpx_fdct16x16(src_diff, coeff, diff_stride);
610
3.12M
      vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
611
3.12M
                     scan_order);
612
3.12M
      break;
613
13.1M
    case TX_8X8:
614
13.1M
      vpx_fdct8x8(src_diff, coeff, diff_stride);
615
13.1M
      vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
616
13.1M
                     scan_order);
617
13.1M
      break;
618
31.7M
    default:
619
31.7M
      assert(tx_size == TX_4X4);
620
31.7M
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
621
31.7M
      vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
622
31.7M
                     scan_order);
623
31.7M
      break;
624
48.7M
  }
625
48.7M
}
626
627
static void encode_block(int plane, int block, int row, int col,
628
8.07M
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
629
8.07M
  struct encode_b_args *const args = arg;
630
#if CONFIG_MISMATCH_DEBUG
631
  int mi_row = args->mi_row;
632
  int mi_col = args->mi_col;
633
  int output_enabled = args->output_enabled;
634
#endif
635
8.07M
  MACROBLOCK *const x = args->x;
636
8.07M
  MACROBLOCKD *const xd = &x->e_mbd;
637
8.07M
  struct macroblock_plane *const p = &x->plane[plane];
638
8.07M
  struct macroblockd_plane *const pd = &xd->plane[plane];
639
8.07M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
640
8.07M
  uint8_t *dst;
641
8.07M
  ENTROPY_CONTEXT *a, *l;
642
8.07M
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
643
8.07M
  a = &args->ta[col];
644
8.07M
  l = &args->tl[row];
645
646
  // TODO(jingning): per transformed block zero forcing only enabled for
647
  // luma component. will integrate chroma components as well.
648
8.07M
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
649
2.62M
    p->eobs[block] = 0;
650
2.62M
    *a = *l = 0;
651
#if CONFIG_MISMATCH_DEBUG
652
    goto encode_block_end;
653
#else
654
2.62M
    return;
655
2.62M
#endif
656
2.62M
  }
657
658
5.45M
  if (!x->skip_recode) {
659
5.45M
    if (x->quant_fp) {
660
      // Encoding process for rtc mode
661
0
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
662
        // skip forward transform
663
0
        p->eobs[block] = 0;
664
0
        *a = *l = 0;
665
#if CONFIG_MISMATCH_DEBUG
666
        goto encode_block_end;
667
#else
668
0
        return;
669
0
#endif
670
0
      } else {
671
0
        vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
672
0
      }
673
5.45M
    } else {
674
5.45M
      if (max_txsize_lookup[plane_bsize] == tx_size) {
675
3.43M
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
676
3.43M
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
677
          // full forward transform and quantization
678
3.43M
          vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
679
3.43M
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
680
          // fast path forward transform and quantization
681
0
          vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
682
0
        } else {
683
          // skip forward transform
684
0
          p->eobs[block] = 0;
685
0
          *a = *l = 0;
686
#if CONFIG_MISMATCH_DEBUG
687
          goto encode_block_end;
688
#else
689
0
          return;
690
0
#endif
691
0
        }
692
3.43M
      } else {
693
2.02M
        vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
694
2.02M
      }
695
5.45M
    }
696
5.45M
  }
697
698
5.45M
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
699
0
    const int ctx = combine_entropy_contexts(*a, *l);
700
0
    *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
701
5.45M
  } else {
702
5.45M
    *a = *l = p->eobs[block] > 0;
703
5.45M
  }
704
705
5.45M
  if (p->eobs[block]) *(args->skip) = 0;
706
707
5.45M
  if (x->skip_encode || p->eobs[block] == 0) {
708
#if CONFIG_MISMATCH_DEBUG
709
    goto encode_block_end;
710
#else
711
458k
    return;
712
458k
#endif
713
458k
  }
714
4.99M
#if CONFIG_VP9_HIGHBITDEPTH
715
4.99M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
716
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
717
0
    switch (tx_size) {
718
0
      case TX_32X32:
719
0
        vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
720
0
                                 xd->bd);
721
0
        break;
722
0
      case TX_16X16:
723
0
        vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
724
0
                                 xd->bd);
725
0
        break;
726
0
      case TX_8X8:
727
0
        vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
728
0
                               xd->bd);
729
0
        break;
730
0
      default:
731
0
        assert(tx_size == TX_4X4);
732
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
733
        // which is significant (not just an optimization) for the lossless
734
        // case.
735
0
        x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
736
0
                               xd->bd);
737
0
        break;
738
0
    }
739
#if CONFIG_MISMATCH_DEBUG
740
    goto encode_block_end;
741
#else
742
0
    return;
743
0
#endif
744
0
  }
745
4.99M
#endif  // CONFIG_VP9_HIGHBITDEPTH
746
747
4.99M
  switch (tx_size) {
748
6.06k
    case TX_32X32:
749
6.06k
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
750
6.06k
      break;
751
43.5k
    case TX_16X16:
752
43.5k
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
753
43.5k
      break;
754
314k
    case TX_8X8:
755
314k
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
756
314k
      break;
757
4.63M
    default:
758
4.63M
      assert(tx_size == TX_4X4);
759
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
760
      // which is significant (not just an optimization) for the lossless
761
      // case.
762
4.63M
      x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
763
4.63M
      break;
764
4.99M
  }
765
#if CONFIG_MISMATCH_DEBUG
766
encode_block_end:
767
  if (output_enabled) {
768
    int pixel_c, pixel_r;
769
    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
770
    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
771
    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
772
                    pd->subsampling_x, pd->subsampling_y);
773
    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
774
                             blk_w, blk_h,
775
                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
776
  }
777
#endif
778
4.99M
}
779
780
static void encode_block_pass1(int plane, int block, int row, int col,
781
                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
782
0
                               void *arg) {
783
0
  MACROBLOCK *const x = (MACROBLOCK *)arg;
784
0
  MACROBLOCKD *const xd = &x->e_mbd;
785
0
  struct macroblock_plane *const p = &x->plane[plane];
786
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
787
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
788
0
  uint8_t *dst;
789
0
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
790
791
0
  vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
792
793
0
  if (p->eobs[block] > 0) {
794
0
#if CONFIG_VP9_HIGHBITDEPTH
795
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
796
0
      x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
797
0
                             p->eobs[block], xd->bd);
798
0
      return;
799
0
    }
800
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
801
0
    x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
802
0
  }
803
0
}
804
805
0
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
806
0
  vp9_subtract_plane(x, bsize, 0);
807
0
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
808
0
                                         encode_block_pass1, x);
809
0
}
810
811
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
812
1.81M
                   int output_enabled) {
813
1.81M
  MACROBLOCKD *const xd = &x->e_mbd;
814
1.81M
  struct optimize_ctx ctx;
815
1.81M
  MODE_INFO *mi = xd->mi[0];
816
1.81M
  int plane;
817
#if CONFIG_MISMATCH_DEBUG
818
  struct encode_b_args arg = { x,
819
                               1,     // enable_trellis_opt
820
                               0.0,   // trellis_opt_thresh
821
                               NULL,  // &sse_calc_done
822
                               NULL,  // &sse
823
                               NULL,  // above entropy context
824
                               NULL,  // left entropy context
825
                               &mi->skip, mi_row, mi_col, output_enabled };
826
#else
827
1.81M
  struct encode_b_args arg = { x,
828
1.81M
                               1,     // enable_trellis_opt
829
1.81M
                               0.0,   // trellis_opt_thresh
830
1.81M
                               NULL,  // &sse_calc_done
831
1.81M
                               NULL,  // &sse
832
1.81M
                               NULL,  // above entropy context
833
1.81M
                               NULL,  // left entropy context
834
1.81M
                               &mi->skip };
835
1.81M
  (void)mi_row;
836
1.81M
  (void)mi_col;
837
1.81M
  (void)output_enabled;
838
1.81M
#endif
839
840
1.81M
  mi->skip = 1;
841
842
1.81M
  if (x->skip) return;
843
844
5.82M
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
845
4.36M
    if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
846
847
4.36M
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
848
0
      const struct macroblockd_plane *const pd = &xd->plane[plane];
849
0
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
850
0
      vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
851
0
                               ctx.tl[plane]);
852
0
      arg.enable_trellis_opt = 1;
853
4.36M
    } else {
854
4.36M
      arg.enable_trellis_opt = 0;
855
4.36M
    }
856
4.36M
    arg.ta = ctx.ta[plane];
857
4.36M
    arg.tl = ctx.tl[plane];
858
859
4.36M
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
860
4.36M
                                           &arg);
861
4.36M
  }
862
1.45M
}
863
864
void vp9_encode_block_intra(int plane, int block, int row, int col,
865
                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
866
240M
                            void *arg) {
867
240M
  struct encode_b_args *const args = arg;
868
240M
  MACROBLOCK *const x = args->x;
869
240M
  MACROBLOCKD *const xd = &x->e_mbd;
870
240M
  MODE_INFO *mi = xd->mi[0];
871
240M
  struct macroblock_plane *const p = &x->plane[plane];
872
240M
  struct macroblockd_plane *const pd = &xd->plane[plane];
873
240M
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
874
240M
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
875
240M
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
876
240M
  const ScanOrder *scan_order;
877
240M
  TX_TYPE tx_type = DCT_DCT;
878
240M
  PREDICTION_MODE mode;
879
240M
  const int bwl = b_width_log2_lookup[plane_bsize];
880
240M
  const int diff_stride = 4 * (1 << bwl);
881
240M
  uint8_t *src, *dst;
882
240M
  int16_t *src_diff;
883
240M
  uint16_t *eob = &p->eobs[block];
884
240M
  const int src_stride = p->src.stride;
885
240M
  const int dst_stride = pd->dst.stride;
886
240M
  int enable_trellis_opt = !x->skip_recode;
887
240M
  ENTROPY_CONTEXT *a = NULL;
888
240M
  ENTROPY_CONTEXT *l = NULL;
889
240M
  int entropy_ctx = 0;
890
240M
  dst = &pd->dst.buf[4 * (row * dst_stride + col)];
891
240M
  src = &p->src.buf[4 * (row * src_stride + col)];
892
240M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
893
894
240M
  if (tx_size == TX_4X4) {
895
177M
    tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
896
177M
    scan_order = &vp9_scan_orders[TX_4X4][tx_type];
897
177M
    mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
898
177M
  } else {
899
62.9M
    mode = plane == 0 ? mi->mode : mi->uv_mode;
900
62.9M
    if (tx_size == TX_32X32) {
901
3.06M
      scan_order = &vp9_default_scan_orders[TX_32X32];
902
59.8M
    } else {
903
59.8M
      tx_type = get_tx_type(get_plane_type(plane), xd);
904
59.8M
      scan_order = &vp9_scan_orders[tx_size][tx_type];
905
59.8M
    }
906
62.9M
  }
907
908
240M
  vp9_predict_intra_block(
909
240M
      xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
910
240M
      (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
911
240M
      dst_stride, col, row, plane);
912
913
  // skip block condition should be handled before this is called.
914
240M
  assert(!x->skip_block);
915
916
240M
  if (!x->skip_recode) {
917
240M
    const int tx_size_in_pixels = (1 << tx_size) << 2;
918
240M
#if CONFIG_VP9_HIGHBITDEPTH
919
240M
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
920
0
      vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
921
0
                                diff_stride, src, src_stride, dst, dst_stride,
922
0
                                xd->bd);
923
240M
    } else {
924
240M
      vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
925
240M
                         diff_stride, src, src_stride, dst, dst_stride);
926
240M
    }
927
#else
928
    vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
929
                       diff_stride, src, src_stride, dst, dst_stride);
930
#endif
931
240M
    enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col,
932
240M
                                        plane_bsize, tx_size, args);
933
240M
  }
934
935
240M
  if (enable_trellis_opt) {
936
19.4M
    a = &args->ta[col];
937
19.4M
    l = &args->tl[row];
938
19.4M
    entropy_ctx = combine_entropy_contexts(*a, *l);
939
19.4M
  }
940
941
240M
#if CONFIG_VP9_HIGHBITDEPTH
942
240M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
943
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
944
0
    switch (tx_size) {
945
0
      case TX_32X32:
946
0
        if (!x->skip_recode) {
947
0
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
948
0
          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
949
0
                                      eob, scan_order);
950
0
        }
951
0
        if (enable_trellis_opt) {
952
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
953
0
        }
954
0
        if (!x->skip_encode && *eob) {
955
0
          vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
956
0
        }
957
0
        break;
958
0
      case TX_16X16:
959
0
        if (!x->skip_recode) {
960
0
          if (tx_type == DCT_DCT)
961
0
            vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
962
0
          else
963
0
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
964
0
          vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant,
965
0
                                eob, scan_order);
966
0
        }
967
0
        if (enable_trellis_opt) {
968
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
969
0
        }
970
0
        if (!x->skip_encode && *eob) {
971
0
          vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
972
0
                                  xd->bd);
973
0
        }
974
0
        break;
975
0
      case TX_8X8:
976
0
        if (!x->skip_recode) {
977
0
          if (tx_type == DCT_DCT)
978
0
            vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
979
0
          else
980
0
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
981
0
          vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
982
0
                                scan_order);
983
0
        }
984
0
        if (enable_trellis_opt) {
985
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
986
0
        }
987
0
        if (!x->skip_encode && *eob) {
988
0
          vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
989
0
                                xd->bd);
990
0
        }
991
0
        break;
992
0
      default:
993
0
        assert(tx_size == TX_4X4);
994
0
        if (!x->skip_recode) {
995
0
          if (tx_type != DCT_DCT)
996
0
            vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
997
0
          else
998
0
            x->fwd_txfm4x4(src_diff, coeff, diff_stride);
999
0
          vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1000
0
                                scan_order);
1001
0
        }
1002
0
        if (enable_trellis_opt) {
1003
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004
0
        }
1005
0
        if (!x->skip_encode && *eob) {
1006
0
          if (tx_type == DCT_DCT) {
1007
            // this is like vp9_short_idct4x4 but has a special case around
1008
            // eob<=1 which is significant (not just an optimization) for the
1009
            // lossless case.
1010
0
            x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
1011
0
          } else {
1012
0
            vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
1013
0
                                     xd->bd);
1014
0
          }
1015
0
        }
1016
0
        break;
1017
0
    }
1018
0
    if (*eob) *(args->skip) = 0;
1019
0
    return;
1020
0
  }
1021
240M
#endif  // CONFIG_VP9_HIGHBITDEPTH
1022
1023
240M
  switch (tx_size) {
1024
3.06M
    case TX_32X32:
1025
3.06M
      if (!x->skip_recode) {
1026
3.06M
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
1027
3.06M
        vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
1028
3.06M
                             scan_order);
1029
3.06M
      }
1030
3.06M
      if (enable_trellis_opt) {
1031
493k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1032
493k
      }
1033
3.06M
      if (!x->skip_encode && *eob)
1034
2.04M
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
1035
3.06M
      break;
1036
10.7M
    case TX_16X16:
1037
10.7M
      if (!x->skip_recode) {
1038
10.7M
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
1039
10.7M
        vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
1040
10.7M
                       scan_order);
1041
10.7M
      }
1042
10.7M
      if (enable_trellis_opt) {
1043
910k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1044
910k
      }
1045
10.7M
      if (!x->skip_encode && *eob)
1046
8.48M
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1047
10.7M
      break;
1048
49.1M
    case TX_8X8:
1049
49.1M
      if (!x->skip_recode) {
1050
49.1M
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
1051
49.1M
        vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
1052
49.1M
                       scan_order);
1053
49.1M
      }
1054
49.1M
      if (enable_trellis_opt) {
1055
3.77M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1056
3.77M
      }
1057
49.1M
      if (!x->skip_encode && *eob)
1058
38.8M
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1059
49.1M
      break;
1060
177M
    default:
1061
177M
      assert(tx_size == TX_4X4);
1062
177M
      if (!x->skip_recode) {
1063
177M
        if (tx_type != DCT_DCT)
1064
15.9M
          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1065
161M
        else
1066
161M
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1067
177M
        vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1068
177M
                       scan_order);
1069
177M
      }
1070
177M
      if (enable_trellis_opt) {
1071
14.2M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1072
14.2M
      }
1073
177M
      if (!x->skip_encode && *eob) {
1074
132M
        if (tx_type == DCT_DCT)
1075
          // this is like vp9_short_idct4x4 but has a special case around eob<=1
1076
          // which is significant (not just an optimization) for the lossless
1077
          // case.
1078
120M
          x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1079
12.6M
        else
1080
12.6M
          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1081
132M
      }
1082
177M
      break;
1083
240M
  }
1084
240M
  if (*eob) *(args->skip) = 0;
1085
240M
}
1086
1087
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1088
13.4M
                                  int enable_trellis_opt) {
1089
13.4M
  const MACROBLOCKD *const xd = &x->e_mbd;
1090
13.4M
  struct optimize_ctx ctx;
1091
#if CONFIG_MISMATCH_DEBUG
1092
  // TODO(angiebird): make mismatch_debug support intra mode
1093
  struct encode_b_args arg = {
1094
    x,
1095
    enable_trellis_opt,
1096
    0.0,   // trellis_opt_thresh
1097
    NULL,  // &sse_calc_done
1098
    NULL,  // &sse
1099
    ctx.ta[plane],
1100
    ctx.tl[plane],
1101
    &xd->mi[0]->skip,
1102
    0,  // mi_row
1103
    0,  // mi_col
1104
    0   // output_enabled
1105
  };
1106
#else
1107
13.4M
  struct encode_b_args arg = { x,
1108
13.4M
                               enable_trellis_opt,
1109
13.4M
                               0.0,   // trellis_opt_thresh
1110
13.4M
                               NULL,  // &sse_calc_done
1111
13.4M
                               NULL,  // &sse
1112
13.4M
                               ctx.ta[plane],
1113
13.4M
                               ctx.tl[plane],
1114
13.4M
                               &xd->mi[0]->skip };
1115
13.4M
#endif
1116
1117
13.4M
  if (enable_trellis_opt && x->optimize &&
1118
0
      (!x->skip_recode || !x->skip_optimize)) {
1119
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1120
0
    const TX_SIZE tx_size =
1121
0
        plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1122
0
    vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1123
13.4M
  } else {
1124
13.4M
    arg.enable_trellis_opt = 0;
1125
13.4M
  }
1126
1127
13.4M
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1128
13.4M
                                         vp9_encode_block_intra, &arg);
1129
13.4M
}