Coverage Report

Created: 2026-05-16 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp9/encoder/vp9_encodemb.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <stdlib.h>
12
13
#include "./vp9_rtcd.h"
14
#include "./vpx_config.h"
15
#include "./vpx_dsp_rtcd.h"
16
17
#include "vpx_dsp/quantize.h"
18
#include "vpx_mem/vpx_mem.h"
19
#include "vpx_ports/mem.h"
20
21
#if CONFIG_MISMATCH_DEBUG
22
#include "vpx_util/vpx_debug_util.h"
23
#endif
24
25
#include "vp9/common/vp9_idct.h"
26
#include "vp9/common/vp9_reconinter.h"
27
#include "vp9/common/vp9_reconintra.h"
28
#include "vp9/common/vp9_scan.h"
29
30
#include "vp9/encoder/vp9_encodemb.h"
31
#include "vp9/encoder/vp9_encoder.h"
32
#include "vp9/encoder/vp9_rd.h"
33
#include "vp9/encoder/vp9_tokenize.h"
34
35
#if defined(NDEBUG)
36
#if defined(__clang__) && defined(__has_builtin)
37
#if __has_builtin(__builtin_assume)
38
// This is verified by test/vp9_scan_test.cc
39
#define ASSUME_VALID_SCAN_VALUE(i) \
40
1.14G
  __builtin_assume(0 <= i && i <= MAX_SCAN_VALUE)
41
// This is verified by test/vp9_entropy_test.cc
42
#define ASSUME_VALID_ENERGY_CLASS(i) \
43
1.17G
  __builtin_assume(0 <= i && i <= MAX_ENERGY_CLASS)
44
1.08G
#define ASSUME_VALID_TOKEN(i) __builtin_assume(0 <= i && i <= MAX_TOKEN)
45
#else
46
#define ASSUME_VALID_SCAN_VALUE(i) \
47
  do {                             \
48
  } while (0)
49
#define ASSUME_VALID_ENERGY_CLASS(i) \
50
  do {                               \
51
  } while (0)
52
#define ASSUME_VALID_TOKEN(i) \
53
  do {                        \
54
  } while (0)
55
#endif
56
#else
57
#define ASSUME_VALID_SCAN_VALUE(i) \
58
  do {                             \
59
  } while (0)
60
#define ASSUME_VALID_ENERGY_CLASS(i) \
61
  do {                               \
62
  } while (0)
63
#define ASSUME_VALID_TOKEN(i) \
64
  do {                        \
65
  } while (0)
66
#endif
67
#else
68
#define ASSUME_VALID_SCAN_VALUE(i) assert(0 <= i && i <= MAX_SCAN_VALUE)
69
#define ASSUME_VALID_ENERGY_CLASS(i) assert(0 <= i && i <= MAX_ENERGY_CLASS)
70
#define ASSUME_VALID_TOKEN(i) assert(0 <= i && i <= MAX_TOKEN)
71
#endif
72
73
struct optimize_ctx {
74
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
75
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
76
};
77
78
42.5M
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
79
42.5M
  struct macroblock_plane *const p = &x->plane[plane];
80
42.5M
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
81
42.5M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
82
42.5M
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
83
42.5M
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
84
85
42.5M
#if CONFIG_VP9_HIGHBITDEPTH
86
42.5M
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
87
0
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
88
0
                              p->src.stride, pd->dst.buf, pd->dst.stride,
89
0
                              x->e_mbd.bd);
90
0
    return;
91
0
  }
92
42.5M
#endif  // CONFIG_VP9_HIGHBITDEPTH
93
42.5M
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
94
42.5M
                     pd->dst.buf, pd->dst.stride);
95
42.5M
}
96
97
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
98
  { 10, 6 },
99
  { 8, 5 },
100
};
101
102
// 'num' can be negative, but 'shift' must be non-negative.
103
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
104
1.80M
  (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
105
106
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
107
29.9M
                   int ctx) {
108
29.9M
  MACROBLOCKD *const xd = &mb->e_mbd;
109
29.9M
  struct macroblock_plane *const p = &mb->plane[plane];
110
29.9M
  struct macroblockd_plane *const pd = &xd->plane[plane];
111
29.9M
  const int ref = is_inter_block(xd->mi[0]);
112
29.9M
  uint8_t token_cache[MAX_SCAN_VALUE + 1];
113
29.9M
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
114
29.9M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
115
29.9M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
116
29.9M
  const int eob = p->eobs[block];
117
29.9M
  const PLANE_TYPE plane_type = get_plane_type(plane);
118
29.9M
  const int default_eob = 16 << (tx_size << 1);
119
29.9M
  const int shift = (tx_size == TX_32X32);
120
29.9M
  const int16_t *const dequant_ptr = pd->dequant;
121
29.9M
  const uint8_t *const band_translate = get_band_translate(tx_size);
122
29.9M
  const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block);
123
29.9M
  const int16_t *const scan = so->scan;
124
29.9M
  const int16_t *const nb = so->neighbors;
125
29.9M
  const MODE_INFO *mbmi = xd->mi[0];
126
29.9M
  const int sharpness = mb->sharpness;
127
29.9M
  const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
128
29.9M
  const int64_t rdmult =
129
29.9M
      (sharpness == 0 ? rdadj >> 1
130
29.9M
                      : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
131
132
29.9M
  const int64_t rddiv = mb->rddiv;
133
29.9M
  int64_t rd_cost0, rd_cost1;
134
29.9M
  int64_t rate0, rate1;
135
29.9M
  int16_t t0, t1;
136
29.9M
  int i, final_eob;
137
29.9M
  int count_high_values_after_eob = 0;
138
29.9M
#if CONFIG_VP9_HIGHBITDEPTH
139
29.9M
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
140
#else
141
  const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
142
#endif
143
29.9M
  unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
144
29.9M
      mb->token_costs[tx_size][plane_type][ref];
145
29.9M
  unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
146
29.9M
  int64_t eob_cost0, eob_cost1;
147
29.9M
  int64_t accu_rate = 0;
148
  // Initialized to the worst possible error for the largest transform size.
149
  // This ensures that it never goes negative.
150
29.9M
  int64_t accu_error = ((int64_t)1) << 50;
151
29.9M
  int64_t best_block_rd_cost = INT64_MAX;
152
29.9M
  int x_prev = 1;
153
29.9M
  tran_low_t before_best_eob_qc = 0;
154
29.9M
  tran_low_t before_best_eob_dqc = 0;
155
156
29.9M
  assert((!plane_type && !plane) || (plane_type && plane));
157
29.9M
  assert(eob <= default_eob);
158
159
576M
  for (i = 0; i < eob; i++) {
160
546M
    const int rc = scan[i];
161
546M
    ASSUME_VALID_SCAN_VALUE(rc);
162
546M
    int16_t token = vp9_get_token(qcoeff[rc]);
163
546M
    ASSUME_VALID_TOKEN(token);
164
546M
    token_cache[rc] = vp9_pt_energy_class[token];
165
546M
  }
166
29.9M
  final_eob = 0;
167
168
  // This is used in the first iteration, and must be inbounds. We cannot
169
  // locally verify that this is in bounds, so we need to verify at runtime.
170
  // For now, only verify if we have array-bounds turned on.
171
29.9M
#if defined(__clang__) && defined(__has_feature)
172
#if __has_feature(array_bounds_sanitizer)
173
  if (ctx < 0 || ctx > MAX_ENERGY_CLASS) {
174
    abort();
175
  }
176
#endif
177
29.9M
#endif
178
179
  // Initial RD cost.
180
29.9M
  token_costs_cur = token_costs + band_translate[0];
181
29.9M
  rate0 = (*token_costs_cur)[0][ctx][EOB_TOKEN];
182
29.9M
  best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
183
184
  // For each token, pick one of two choices greedily:
185
  // (i) First candidate: Keep current quantized value, OR
186
  // (ii) Second candidate: Reduce quantized value by 1.
187
576M
  for (i = 0; i < eob; i++) {
188
546M
    const int rc = scan[i];
189
546M
    ASSUME_VALID_SCAN_VALUE(rc);
190
546M
    const int x = qcoeff[rc];
191
546M
    const int band_cur = band_translate[i];
192
546M
    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
193
546M
    ASSUME_VALID_ENERGY_CLASS(ctx_cur);
194
546M
    const int token_tree_sel_cur = (x_prev == 0);
195
546M
    token_costs_cur = token_costs + band_cur;
196
546M
    if (x == 0) {  // No need to search
197
224M
      const int token = vp9_get_token(x);
198
224M
      ASSUME_VALID_TOKEN(token);
199
224M
      rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
200
224M
      accu_rate += rate0;
201
224M
      x_prev = 0;
202
      // Note: accu_error does not change.
203
322M
    } else {
204
322M
      const int dqv = dequant_ptr[rc != 0];
205
      // Compute the distortion for quantizing to 0.
206
322M
      const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
207
322M
      const int diff_for_zero =
208
322M
#if CONFIG_VP9_HIGHBITDEPTH
209
322M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
210
322M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
211
322M
              :
212
322M
#endif
213
322M
              diff_for_zero_raw;
214
322M
      const int64_t distortion_for_zero =
215
322M
          (int64_t)diff_for_zero * diff_for_zero;
216
217
      // Compute the distortion for the first candidate
218
322M
      const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
219
322M
      const int diff0 =
220
322M
#if CONFIG_VP9_HIGHBITDEPTH
221
322M
          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
222
322M
              ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
223
322M
              :
224
322M
#endif  // CONFIG_VP9_HIGHBITDEPTH
225
322M
              diff0_raw;
226
322M
      const int64_t distortion0 = (int64_t)diff0 * diff0;
227
228
      // Compute the distortion for the second candidate
229
322M
      const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
230
322M
      const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
231
322M
      int64_t distortion1;
232
322M
      if (x1 != 0) {
233
214M
        const int dqv_step =
234
214M
#if CONFIG_VP9_HIGHBITDEPTH
235
214M
            (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
236
214M
                                                          :
237
214M
#endif  // CONFIG_VP9_HIGHBITDEPTH
238
214M
                                                          dqv;
239
214M
        const int diff_step = (dqv_step + sign) ^ sign;
240
214M
        const int diff1 = diff0 - diff_step;
241
214M
        assert(dqv > 0);  // We aren't right shifting a negative number above.
242
214M
        distortion1 = (int64_t)diff1 * diff1;
243
214M
      } else {
244
108M
        distortion1 = distortion_for_zero;
245
108M
      }
246
322M
      {
247
        // Calculate RDCost for current coeff for the two candidates.
248
322M
        const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
249
322M
        const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
250
322M
        rate0 =
251
322M
            base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
252
322M
        rate1 =
253
322M
            base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
254
322M
      }
255
322M
      {
256
322M
        int rdcost_better_for_x1, eob_rdcost_better_for_x1;
257
322M
        int dqc0, dqc1;
258
322M
        int64_t best_eob_cost_cur;
259
322M
        int use_x1;
260
261
        // Calculate RD Cost effect on the next coeff for the two candidates.
262
322M
        int64_t next_bits0 = 0;
263
322M
        int64_t next_bits1 = 0;
264
322M
        int64_t next_eob_bits0 = 0;
265
322M
        int64_t next_eob_bits1 = 0;
266
322M
        if (i < default_eob - 1) {
267
314M
          int ctx_next, token_tree_sel_next;
268
314M
          const int band_next = band_translate[i + 1];
269
314M
          const int token_next =
270
314M
              (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
271
314M
          ASSUME_VALID_TOKEN(token_next);
272
314M
          unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
273
314M
                                               [ENTROPY_TOKENS] =
274
314M
                                                   token_costs + band_next;
275
314M
          token_cache[rc] = vp9_pt_energy_class[t0];
276
314M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
277
          // token_cache is initialized with valid energy classes.
278
          // get_coef_context returns at most the maximum value of
279
          // token_cache.
280
314M
          ASSUME_VALID_ENERGY_CLASS(ctx_next);
281
314M
          token_tree_sel_next = (x == 0);
282
314M
          next_bits0 =
283
314M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
284
314M
          next_eob_bits0 =
285
314M
              (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
286
314M
          token_cache[rc] = vp9_pt_energy_class[t1];
287
314M
          ctx_next = get_coef_context(nb, token_cache, i + 1);
288
          // token_cache is initialized with valid energy classes.
289
          // get_coef_context returns at most the maximum value of
290
          // token_cache.
291
314M
          ASSUME_VALID_ENERGY_CLASS(ctx_next);
292
314M
          token_tree_sel_next = (x1 == 0);
293
314M
          next_bits1 =
294
314M
              (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
295
314M
          if (x1 != 0) {
296
209M
            next_eob_bits1 =
297
209M
                (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
298
209M
          }
299
314M
        }
300
301
        // Compare the total RD costs for two candidates.
302
322M
        rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
303
322M
        rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
304
322M
        rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
305
322M
        eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
306
322M
                           (accu_error + distortion0 - distortion_for_zero));
307
322M
        eob_cost1 = eob_cost0;
308
322M
        if (x1 != 0) {
309
214M
          eob_cost1 =
310
214M
              RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
311
214M
                     (accu_error + distortion1 - distortion_for_zero));
312
214M
          eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
313
214M
        } else {
314
108M
          eob_rdcost_better_for_x1 = 0;
315
108M
        }
316
317
        // Calculate the two candidate de-quantized values.
318
322M
        dqc0 = dqcoeff[rc];
319
322M
        dqc1 = 0;
320
322M
        if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
321
4.05M
          if (x1 != 0) {
322
1.80M
            dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
323
2.25M
          } else {
324
2.25M
            dqc1 = 0;
325
2.25M
          }
326
4.05M
        }
327
328
        // Pick and record the better quantized and de-quantized values.
329
322M
        if (rdcost_better_for_x1) {
330
3.90M
          qcoeff[rc] = x1;
331
3.90M
          dqcoeff[rc] = dqc1;
332
3.90M
          accu_rate += rate1;
333
3.90M
          accu_error += distortion1 - distortion_for_zero;
334
3.90M
          assert(distortion1 <= distortion_for_zero);
335
3.90M
          token_cache[rc] = vp9_pt_energy_class[t1];
336
318M
        } else {
337
318M
          accu_rate += rate0;
338
318M
          accu_error += distortion0 - distortion_for_zero;
339
318M
          assert(distortion0 <= distortion_for_zero);
340
318M
          token_cache[rc] = vp9_pt_energy_class[t0];
341
318M
        }
342
322M
        if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
343
322M
        assert(accu_error >= 0);
344
322M
        x_prev = qcoeff[rc];  // Update based on selected quantized value.
345
346
322M
        use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
347
322M
        best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
348
349
        // Determine whether to move the eob position to i+1
350
322M
        if (best_eob_cost_cur < best_block_rd_cost) {
351
307M
          best_block_rd_cost = best_eob_cost_cur;
352
307M
          final_eob = i + 1;
353
307M
          count_high_values_after_eob = 0;
354
307M
          if (use_x1) {
355
1.78M
            before_best_eob_qc = x1;
356
1.78M
            before_best_eob_dqc = dqc1;
357
305M
          } else {
358
305M
            before_best_eob_qc = x;
359
305M
            before_best_eob_dqc = dqc0;
360
305M
          }
361
307M
        }
362
322M
      }
363
322M
    }
364
546M
  }
365
29.9M
  if (count_high_values_after_eob > 0) {
366
0
    final_eob = eob - 1;
367
0
    for (; final_eob >= 0; final_eob--) {
368
0
      const int rc = scan[final_eob];
369
0
      ASSUME_VALID_SCAN_VALUE(rc);
370
0
      const int x = qcoeff[rc];
371
0
      if (x) {
372
0
        break;
373
0
      }
374
0
    }
375
0
    final_eob++;
376
29.9M
  } else {
377
29.9M
    assert(final_eob <= eob);
378
29.9M
    if (final_eob > 0) {
379
16.5M
      int rc;
380
16.5M
      assert(before_best_eob_qc != 0);
381
16.5M
      i = final_eob - 1;
382
16.5M
      rc = scan[i];
383
16.5M
      ASSUME_VALID_SCAN_VALUE(rc);
384
16.5M
      qcoeff[rc] = before_best_eob_qc;
385
16.5M
      dqcoeff[rc] = before_best_eob_dqc;
386
16.5M
    }
387
62.4M
    for (i = final_eob; i < eob; i++) {
388
32.5M
      int rc = scan[i];
389
32.5M
      ASSUME_VALID_SCAN_VALUE(rc);
390
32.5M
      qcoeff[rc] = 0;
391
32.5M
      dqcoeff[rc] = 0;
392
32.5M
    }
393
29.9M
  }
394
29.9M
  mb->plane[plane].eobs[block] = final_eob;
395
29.9M
  return final_eob;
396
29.9M
}
397
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
398
399
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
400
4.78M
                             tran_low_t *dst, int src_stride) {
401
4.78M
  if (rd_transform)
402
4.58M
    vpx_fdct32x32_rd(src, dst, src_stride);
403
203k
  else
404
203k
    vpx_fdct32x32(src, dst, src_stride);
405
4.78M
}
406
407
#if CONFIG_VP9_HIGHBITDEPTH
408
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
409
0
                                    tran_low_t *dst, int src_stride) {
410
0
  if (rd_transform)
411
0
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
412
0
  else
413
0
    vpx_highbd_fdct32x32(src, dst, src_stride);
414
0
}
415
#endif  // CONFIG_VP9_HIGHBITDEPTH
416
417
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
418
0
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
419
0
  MACROBLOCKD *const xd = &x->e_mbd;
420
0
  const struct macroblock_plane *const p = &x->plane[plane];
421
0
  const struct macroblockd_plane *const pd = &xd->plane[plane];
422
0
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
423
0
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
424
0
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
425
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
426
0
  uint16_t *const eob = &p->eobs[block];
427
0
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
428
0
  const int16_t *src_diff;
429
0
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
430
  // skip block condition should be handled before this is called.
431
0
  assert(!x->skip_block);
432
433
0
#if CONFIG_VP9_HIGHBITDEPTH
434
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
435
0
    switch (tx_size) {
436
0
      case TX_32X32:
437
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
438
0
        vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff,
439
0
                                     pd->dequant, eob, scan_order);
440
0
        break;
441
0
      case TX_16X16:
442
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
443
0
        vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
444
0
                               scan_order);
445
0
        break;
446
0
      case TX_8X8:
447
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
448
0
        vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
449
0
                               scan_order);
450
0
        break;
451
0
      default:
452
0
        assert(tx_size == TX_4X4);
453
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
454
0
        vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
455
0
                               scan_order);
456
0
        break;
457
0
    }
458
0
    return;
459
0
  }
460
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
461
462
0
  switch (tx_size) {
463
0
    case TX_32X32:
464
0
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
465
0
      vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob,
466
0
                            scan_order);
467
0
      break;
468
0
    case TX_16X16:
469
0
      vpx_fdct16x16(src_diff, coeff, diff_stride);
470
0
      vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
471
0
                      scan_order);
472
0
      break;
473
0
    case TX_8X8:
474
0
      vpx_fdct8x8(src_diff, coeff, diff_stride);
475
0
      vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
476
0
                      scan_order);
477
478
0
      break;
479
0
    default:
480
0
      assert(tx_size == TX_4X4);
481
0
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
482
0
      vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
483
0
                      scan_order);
484
0
      break;
485
0
  }
486
0
}
487
488
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
489
412k
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
490
412k
  MACROBLOCKD *const xd = &x->e_mbd;
491
412k
  const struct macroblock_plane *const p = &x->plane[plane];
492
412k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
493
412k
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
494
412k
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
495
412k
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
496
412k
  uint16_t *const eob = &p->eobs[block];
497
412k
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
498
412k
  const int16_t *src_diff;
499
412k
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
500
  // skip block condition should be handled before this is called.
501
412k
  assert(!x->skip_block);
502
503
412k
#if CONFIG_VP9_HIGHBITDEPTH
504
412k
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
505
0
    switch (tx_size) {
506
0
      case TX_32X32:
507
0
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
508
0
        vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
509
0
                                     dqcoeff, pd->dequant[0], eob);
510
0
        break;
511
0
      case TX_16X16:
512
0
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
513
0
        vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
514
0
                               dqcoeff, pd->dequant[0], eob);
515
0
        break;
516
0
      case TX_8X8:
517
0
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
518
0
        vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
519
0
                               dqcoeff, pd->dequant[0], eob);
520
0
        break;
521
0
      default:
522
0
        assert(tx_size == TX_4X4);
523
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
524
0
        vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
525
0
                               dqcoeff, pd->dequant[0], eob);
526
0
        break;
527
0
    }
528
0
    return;
529
0
  }
530
412k
#endif  // CONFIG_VP9_HIGHBITDEPTH
531
532
412k
  switch (tx_size) {
533
4.63k
    case TX_32X32:
534
4.63k
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
535
4.63k
      vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
536
4.63k
                            pd->dequant[0], eob);
537
4.63k
      break;
538
10.6k
    case TX_16X16:
539
10.6k
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
540
10.6k
      vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
541
10.6k
                      pd->dequant[0], eob);
542
10.6k
      break;
543
66.1k
    case TX_8X8:
544
66.1k
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
545
66.1k
      vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
546
66.1k
                      pd->dequant[0], eob);
547
66.1k
      break;
548
330k
    default:
549
330k
      assert(tx_size == TX_4X4);
550
330k
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
551
330k
      vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
552
330k
                      pd->dequant[0], eob);
553
330k
      break;
554
412k
  }
555
412k
}
556
557
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
558
67.9M
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
559
67.9M
  MACROBLOCKD *const xd = &x->e_mbd;
560
67.9M
  const struct macroblock_plane *const p = &x->plane[plane];
561
67.9M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
562
67.9M
  const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
563
67.9M
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
564
67.9M
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
565
67.9M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
566
67.9M
  uint16_t *const eob = &p->eobs[block];
567
67.9M
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
568
67.9M
  const int16_t *src_diff;
569
67.9M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
570
  // skip block condition should be handled before this is called.
571
67.9M
  assert(!x->skip_block);
572
573
67.9M
#if CONFIG_VP9_HIGHBITDEPTH
574
67.9M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
575
0
    switch (tx_size) {
576
0
      case TX_32X32:
577
0
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
578
0
        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
579
0
                                    scan_order);
580
0
        break;
581
0
      case TX_16X16:
582
0
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
583
0
        vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
584
0
                              scan_order);
585
0
        break;
586
0
      case TX_8X8:
587
0
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
588
0
        vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
589
0
                              scan_order);
590
0
        break;
591
0
      default:
592
0
        assert(tx_size == TX_4X4);
593
0
        x->fwd_txfm4x4(src_diff, coeff, diff_stride);
594
0
        vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
595
0
                              scan_order);
596
0
        break;
597
0
    }
598
0
    return;
599
0
  }
600
67.9M
#endif  // CONFIG_VP9_HIGHBITDEPTH
601
602
67.9M
  switch (tx_size) {
603
1.04M
    case TX_32X32:
604
1.04M
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
605
1.04M
      vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
606
1.04M
                           scan_order);
607
1.04M
      break;
608
4.39M
    case TX_16X16:
609
4.39M
      vpx_fdct16x16(src_diff, coeff, diff_stride);
610
4.39M
      vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
611
4.39M
                     scan_order);
612
4.39M
      break;
613
18.1M
    case TX_8X8:
614
18.1M
      vpx_fdct8x8(src_diff, coeff, diff_stride);
615
18.1M
      vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
616
18.1M
                     scan_order);
617
18.1M
      break;
618
44.3M
    default:
619
44.3M
      assert(tx_size == TX_4X4);
620
44.3M
      x->fwd_txfm4x4(src_diff, coeff, diff_stride);
621
44.3M
      vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
622
44.3M
                     scan_order);
623
44.3M
      break;
624
67.9M
  }
625
67.9M
}
626
627
static void encode_block(int plane, int block, int row, int col,
628
10.8M
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
629
10.8M
  struct encode_b_args *const args = arg;
630
#if CONFIG_MISMATCH_DEBUG
631
  int mi_row = args->mi_row;
632
  int mi_col = args->mi_col;
633
  int output_enabled = args->output_enabled;
634
#endif
635
10.8M
  MACROBLOCK *const x = args->x;
636
10.8M
  MACROBLOCKD *const xd = &x->e_mbd;
637
10.8M
  struct macroblock_plane *const p = &x->plane[plane];
638
10.8M
  struct macroblockd_plane *const pd = &xd->plane[plane];
639
10.8M
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
640
10.8M
  uint8_t *dst;
641
10.8M
  ENTROPY_CONTEXT *a, *l;
642
10.8M
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
643
10.8M
  a = &args->ta[col];
644
10.8M
  l = &args->tl[row];
645
646
  // TODO(jingning): per transformed block zero forcing only enabled for
647
  // luma component. will integrate chroma components as well.
648
10.8M
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
649
3.30M
    p->eobs[block] = 0;
650
3.30M
    *a = *l = 0;
651
#if CONFIG_MISMATCH_DEBUG
652
    goto encode_block_end;
653
#else
654
3.30M
    return;
655
3.30M
#endif
656
3.30M
  }
657
658
7.50M
  if (!x->skip_recode) {
659
7.50M
    if (x->quant_fp) {
660
      // Encoding process for rtc mode
661
0
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
662
        // skip forward transform
663
0
        p->eobs[block] = 0;
664
0
        *a = *l = 0;
665
#if CONFIG_MISMATCH_DEBUG
666
        goto encode_block_end;
667
#else
668
0
        return;
669
0
#endif
670
0
      } else {
671
0
        vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
672
0
      }
673
7.50M
    } else {
674
7.50M
      if (max_txsize_lookup[plane_bsize] == tx_size) {
675
4.59M
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
676
4.59M
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
677
          // full forward transform and quantization
678
4.59M
          vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
679
4.59M
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
680
          // fast path forward transform and quantization
681
0
          vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
682
0
        } else {
683
          // skip forward transform
684
0
          p->eobs[block] = 0;
685
0
          *a = *l = 0;
686
#if CONFIG_MISMATCH_DEBUG
687
          goto encode_block_end;
688
#else
689
0
          return;
690
0
#endif
691
0
        }
692
4.59M
      } else {
693
2.91M
        vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
694
2.91M
      }
695
7.50M
    }
696
7.50M
  }
697
698
7.50M
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
699
0
    const int ctx = combine_entropy_contexts(*a, *l);
700
0
    *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
701
7.50M
  } else {
702
7.50M
    *a = *l = p->eobs[block] > 0;
703
7.50M
  }
704
705
7.50M
  if (p->eobs[block]) *(args->skip) = 0;
706
707
7.50M
  if (x->skip_encode || p->eobs[block] == 0) {
708
#if CONFIG_MISMATCH_DEBUG
709
    goto encode_block_end;
710
#else
711
577k
    return;
712
577k
#endif
713
577k
  }
714
6.93M
#if CONFIG_VP9_HIGHBITDEPTH
715
6.93M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
716
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
717
0
    switch (tx_size) {
718
0
      case TX_32X32:
719
0
        vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
720
0
                                 xd->bd);
721
0
        break;
722
0
      case TX_16X16:
723
0
        vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
724
0
                                 xd->bd);
725
0
        break;
726
0
      case TX_8X8:
727
0
        vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
728
0
                               xd->bd);
729
0
        break;
730
0
      default:
731
0
        assert(tx_size == TX_4X4);
732
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
733
        // which is significant (not just an optimization) for the lossless
734
        // case.
735
0
        x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
736
0
                               xd->bd);
737
0
        break;
738
0
    }
739
#if CONFIG_MISMATCH_DEBUG
740
    goto encode_block_end;
741
#else
742
0
    return;
743
0
#endif
744
0
  }
745
6.93M
#endif  // CONFIG_VP9_HIGHBITDEPTH
746
747
6.93M
  switch (tx_size) {
748
10.0k
    case TX_32X32:
749
10.0k
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
750
10.0k
      break;
751
67.8k
    case TX_16X16:
752
67.8k
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
753
67.8k
      break;
754
459k
    case TX_8X8:
755
459k
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
756
459k
      break;
757
6.39M
    default:
758
6.39M
      assert(tx_size == TX_4X4);
759
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
760
      // which is significant (not just an optimization) for the lossless
761
      // case.
762
6.39M
      x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
763
6.39M
      break;
764
6.93M
  }
765
#if CONFIG_MISMATCH_DEBUG
766
encode_block_end:
767
  if (output_enabled) {
768
    int pixel_c, pixel_r;
769
    int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
770
    int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
771
    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
772
                    pd->subsampling_x, pd->subsampling_y);
773
    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
774
                             blk_w, blk_h,
775
                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
776
  }
777
#endif
778
6.93M
}
779
780
static void encode_block_pass1(int plane, int block, int row, int col,
781
                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
782
0
                               void *arg) {
783
0
  MACROBLOCK *const x = (MACROBLOCK *)arg;
784
0
  MACROBLOCKD *const xd = &x->e_mbd;
785
0
  struct macroblock_plane *const p = &x->plane[plane];
786
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
787
0
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
788
0
  uint8_t *dst;
789
0
  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
790
791
0
  vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
792
793
0
  if (p->eobs[block] > 0) {
794
0
#if CONFIG_VP9_HIGHBITDEPTH
795
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
796
0
      x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
797
0
                             p->eobs[block], xd->bd);
798
0
      return;
799
0
    }
800
0
#endif  // CONFIG_VP9_HIGHBITDEPTH
801
0
    x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
802
0
  }
803
0
}
804
805
0
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
806
0
  vp9_subtract_plane(x, bsize, 0);
807
0
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
808
0
                                         encode_block_pass1, x);
809
0
}
810
811
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
812
2.37M
                   int output_enabled) {
813
2.37M
  MACROBLOCKD *const xd = &x->e_mbd;
814
2.37M
  struct optimize_ctx ctx;
815
2.37M
  MODE_INFO *mi = xd->mi[0];
816
2.37M
  int plane;
817
#if CONFIG_MISMATCH_DEBUG
818
  struct encode_b_args arg = { x,
819
                               1,     // enable_trellis_opt
820
                               0.0,   // trellis_opt_thresh
821
                               NULL,  // &sse_calc_done
822
                               NULL,  // &sse
823
                               NULL,  // above entropy context
824
                               NULL,  // left entropy context
825
                               &mi->skip, mi_row, mi_col, output_enabled };
826
#else
827
2.37M
  struct encode_b_args arg = { x,
828
2.37M
                               1,     // enable_trellis_opt
829
2.37M
                               0.0,   // trellis_opt_thresh
830
2.37M
                               NULL,  // &sse_calc_done
831
2.37M
                               NULL,  // &sse
832
2.37M
                               NULL,  // above entropy context
833
2.37M
                               NULL,  // left entropy context
834
2.37M
                               &mi->skip };
835
2.37M
  (void)mi_row;
836
2.37M
  (void)mi_col;
837
2.37M
  (void)output_enabled;
838
2.37M
#endif
839
840
2.37M
  mi->skip = 1;
841
842
2.37M
  if (x->skip) return;
843
844
7.70M
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
845
5.77M
    if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
846
847
5.77M
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
848
0
      const struct macroblockd_plane *const pd = &xd->plane[plane];
849
0
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
850
0
      vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
851
0
                               ctx.tl[plane]);
852
0
      arg.enable_trellis_opt = 1;
853
5.77M
    } else {
854
5.77M
      arg.enable_trellis_opt = 0;
855
5.77M
    }
856
5.77M
    arg.ta = ctx.ta[plane];
857
5.77M
    arg.tl = ctx.tl[plane];
858
859
5.77M
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
860
5.77M
                                           &arg);
861
5.77M
  }
862
1.92M
}
863
864
void vp9_encode_block_intra(int plane, int block, int row, int col,
865
                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
866
344M
                            void *arg) {
867
344M
  struct encode_b_args *const args = arg;
868
344M
  MACROBLOCK *const x = args->x;
869
344M
  MACROBLOCKD *const xd = &x->e_mbd;
870
344M
  MODE_INFO *mi = xd->mi[0];
871
344M
  struct macroblock_plane *const p = &x->plane[plane];
872
344M
  struct macroblockd_plane *const pd = &xd->plane[plane];
873
344M
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
874
344M
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
875
344M
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
876
344M
  const ScanOrder *scan_order;
877
344M
  TX_TYPE tx_type = DCT_DCT;
878
344M
  PREDICTION_MODE mode;
879
344M
  const int bwl = b_width_log2_lookup[plane_bsize];
880
344M
  const int diff_stride = 4 * (1 << bwl);
881
344M
  uint8_t *src, *dst;
882
344M
  int16_t *src_diff;
883
344M
  uint16_t *eob = &p->eobs[block];
884
344M
  const int src_stride = p->src.stride;
885
344M
  const int dst_stride = pd->dst.stride;
886
344M
  int enable_trellis_opt = !x->skip_recode;
887
344M
  ENTROPY_CONTEXT *a = NULL;
888
344M
  ENTROPY_CONTEXT *l = NULL;
889
344M
  int entropy_ctx = 0;
890
344M
  dst = &pd->dst.buf[4 * (row * dst_stride + col)];
891
344M
  src = &p->src.buf[4 * (row * src_stride + col)];
892
344M
  src_diff = &p->src_diff[4 * (row * diff_stride + col)];
893
894
344M
  if (tx_size == TX_4X4) {
895
260M
    tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
896
260M
    scan_order = &vp9_scan_orders[TX_4X4][tx_type];
897
260M
    mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
898
260M
  } else {
899
84.3M
    mode = plane == 0 ? mi->mode : mi->uv_mode;
900
84.3M
    if (tx_size == TX_32X32) {
901
3.73M
      scan_order = &vp9_default_scan_orders[TX_32X32];
902
80.6M
    } else {
903
80.6M
      tx_type = get_tx_type(get_plane_type(plane), xd);
904
80.6M
      scan_order = &vp9_scan_orders[tx_size][tx_type];
905
80.6M
    }
906
84.3M
  }
907
908
344M
  vp9_predict_intra_block(
909
344M
      xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
910
344M
      (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
911
344M
      dst_stride, col, row, plane);
912
913
  // skip block condition should be handled before this is called.
914
344M
  assert(!x->skip_block);
915
916
344M
  if (!x->skip_recode) {
917
344M
    const int tx_size_in_pixels = (1 << tx_size) << 2;
918
344M
#if CONFIG_VP9_HIGHBITDEPTH
919
344M
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
920
0
      vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
921
0
                                diff_stride, src, src_stride, dst, dst_stride,
922
0
                                xd->bd);
923
344M
    } else {
924
344M
      vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
925
344M
                         diff_stride, src, src_stride, dst, dst_stride);
926
344M
    }
927
#else
928
    vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff,
929
                       diff_stride, src, src_stride, dst, dst_stride);
930
#endif
931
344M
    enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col,
932
344M
                                        plane_bsize, tx_size, args);
933
344M
  }
934
935
344M
  if (enable_trellis_opt) {
936
25.2M
    a = &args->ta[col];
937
25.2M
    l = &args->tl[row];
938
25.2M
    entropy_ctx = combine_entropy_contexts(*a, *l);
939
25.2M
  }
940
941
344M
#if CONFIG_VP9_HIGHBITDEPTH
942
344M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
943
0
    uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
944
0
    switch (tx_size) {
945
0
      case TX_32X32:
946
0
        if (!x->skip_recode) {
947
0
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
948
0
          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
949
0
                                      eob, scan_order);
950
0
        }
951
0
        if (enable_trellis_opt) {
952
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
953
0
        }
954
0
        if (!x->skip_encode && *eob) {
955
0
          vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
956
0
        }
957
0
        break;
958
0
      case TX_16X16:
959
0
        if (!x->skip_recode) {
960
0
          if (tx_type == DCT_DCT)
961
0
            vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
962
0
          else
963
0
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
964
0
          vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant,
965
0
                                eob, scan_order);
966
0
        }
967
0
        if (enable_trellis_opt) {
968
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
969
0
        }
970
0
        if (!x->skip_encode && *eob) {
971
0
          vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
972
0
                                  xd->bd);
973
0
        }
974
0
        break;
975
0
      case TX_8X8:
976
0
        if (!x->skip_recode) {
977
0
          if (tx_type == DCT_DCT)
978
0
            vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
979
0
          else
980
0
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
981
0
          vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
982
0
                                scan_order);
983
0
        }
984
0
        if (enable_trellis_opt) {
985
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
986
0
        }
987
0
        if (!x->skip_encode && *eob) {
988
0
          vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
989
0
                                xd->bd);
990
0
        }
991
0
        break;
992
0
      default:
993
0
        assert(tx_size == TX_4X4);
994
0
        if (!x->skip_recode) {
995
0
          if (tx_type != DCT_DCT)
996
0
            vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
997
0
          else
998
0
            x->fwd_txfm4x4(src_diff, coeff, diff_stride);
999
0
          vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1000
0
                                scan_order);
1001
0
        }
1002
0
        if (enable_trellis_opt) {
1003
0
          *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004
0
        }
1005
0
        if (!x->skip_encode && *eob) {
1006
0
          if (tx_type == DCT_DCT) {
1007
            // this is like vp9_short_idct4x4 but has a special case around
1008
            // eob<=1 which is significant (not just an optimization) for the
1009
            // lossless case.
1010
0
            x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
1011
0
          } else {
1012
0
            vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
1013
0
                                     xd->bd);
1014
0
          }
1015
0
        }
1016
0
        break;
1017
0
    }
1018
0
    if (*eob) *(args->skip) = 0;
1019
0
    return;
1020
0
  }
1021
344M
#endif  // CONFIG_VP9_HIGHBITDEPTH
1022
1023
344M
  switch (tx_size) {
1024
3.73M
    case TX_32X32:
1025
3.73M
      if (!x->skip_recode) {
1026
3.73M
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
1027
3.73M
        vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
1028
3.73M
                             scan_order);
1029
3.73M
      }
1030
3.73M
      if (enable_trellis_opt) {
1031
513k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1032
513k
      }
1033
3.73M
      if (!x->skip_encode && *eob)
1034
2.68M
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
1035
3.73M
      break;
1036
14.1M
    case TX_16X16:
1037
14.1M
      if (!x->skip_recode) {
1038
14.1M
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
1039
14.1M
        vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob,
1040
14.1M
                       scan_order);
1041
14.1M
      }
1042
14.1M
      if (enable_trellis_opt) {
1043
987k
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1044
987k
      }
1045
14.1M
      if (!x->skip_encode && *eob)
1046
11.7M
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1047
14.1M
      break;
1048
66.5M
    case TX_8X8:
1049
66.5M
      if (!x->skip_recode) {
1050
66.5M
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
1051
66.5M
        vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob,
1052
66.5M
                       scan_order);
1053
66.5M
      }
1054
66.5M
      if (enable_trellis_opt) {
1055
4.25M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1056
4.25M
      }
1057
66.5M
      if (!x->skip_encode && *eob)
1058
54.7M
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1059
66.5M
      break;
1060
260M
    default:
1061
260M
      assert(tx_size == TX_4X4);
1062
260M
      if (!x->skip_recode) {
1063
260M
        if (tx_type != DCT_DCT)
1064
22.6M
          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1065
237M
        else
1066
237M
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1067
260M
        vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob,
1068
260M
                       scan_order);
1069
260M
      }
1070
260M
      if (enable_trellis_opt) {
1071
19.4M
        *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1072
19.4M
      }
1073
260M
      if (!x->skip_encode && *eob) {
1074
202M
        if (tx_type == DCT_DCT)
1075
          // this is like vp9_short_idct4x4 but has a special case around eob<=1
1076
          // which is significant (not just an optimization) for the lossless
1077
          // case.
1078
184M
          x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1079
18.2M
        else
1080
18.2M
          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1081
202M
      }
1082
260M
      break;
1083
344M
  }
1084
344M
  if (*eob) *(args->skip) = 0;
1085
344M
}
1086
1087
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1088
18.2M
                                  int enable_trellis_opt) {
1089
18.2M
  const MACROBLOCKD *const xd = &x->e_mbd;
1090
18.2M
  struct optimize_ctx ctx;
1091
#if CONFIG_MISMATCH_DEBUG
1092
  // TODO(angiebird): make mismatch_debug support intra mode
1093
  struct encode_b_args arg = {
1094
    x,
1095
    enable_trellis_opt,
1096
    0.0,   // trellis_opt_thresh
1097
    NULL,  // &sse_calc_done
1098
    NULL,  // &sse
1099
    ctx.ta[plane],
1100
    ctx.tl[plane],
1101
    &xd->mi[0]->skip,
1102
    0,  // mi_row
1103
    0,  // mi_col
1104
    0   // output_enabled
1105
  };
1106
#else
1107
18.2M
  struct encode_b_args arg = { x,
1108
18.2M
                               enable_trellis_opt,
1109
18.2M
                               0.0,   // trellis_opt_thresh
1110
18.2M
                               NULL,  // &sse_calc_done
1111
18.2M
                               NULL,  // &sse
1112
18.2M
                               ctx.ta[plane],
1113
18.2M
                               ctx.tl[plane],
1114
18.2M
                               &xd->mi[0]->skip };
1115
18.2M
#endif
1116
1117
18.2M
  if (enable_trellis_opt && x->optimize &&
1118
0
      (!x->skip_recode || !x->skip_optimize)) {
1119
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1120
0
    const TX_SIZE tx_size =
1121
0
        plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1122
0
    vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1123
18.2M
  } else {
1124
18.2M
    arg.enable_trellis_opt = 0;
1125
18.2M
  }
1126
1127
18.2M
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1128
18.2M
                                         vp9_encode_block_intra, &arg);
1129
18.2M
}