Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/full_loop.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 3-Clause Clear License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
#include "definitions.h"
14
#include "full_loop.h"
15
#include "pcs.h"
16
#include "rd_cost.h"
17
#include "aom_dsp_rtcd.h"
18
#include "sequence_control_set.h"
19
#include "utility.h"
20
#include "ac_bias.h"
21
22
const int av1_get_tx_scale_tab[TX_SIZES_ALL] = {0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 1, 1};
23
24
void     svt_aom_residual_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, uint8_t* pred,
25
                                 uint32_t pred_offset, uint32_t pred_stride, int16_t* residual, uint32_t residual_offset,
26
                                 uint32_t residual_stride, bool hbd, uint32_t area_width, uint32_t area_height);
27
uint64_t svt_spatial_full_distortion_ssim_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride,
28
                                                 uint8_t* recon, int32_t recon_offset, uint32_t recon_stride,
29
                                                 uint32_t area_width, uint32_t area_height, bool hbd, double ac_bias);
30
31
void svt_aom_quantize_b_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
32
                          const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
33
                          TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr,
34
                          const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr,
35
1.20M
                          const int32_t log_scale) {
36
1.20M
    const int32_t zbins[2]  = {ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)};
37
1.20M
    const int32_t nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
38
1.20M
    intptr_t      non_zero_count = n_coeffs, eob = -1;
39
1.20M
    (void)iscan;
40
41
1.20M
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
42
1.20M
    memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
43
44
    // Pre-scan pass
45
23.5M
    for (intptr_t i = n_coeffs - 1; i >= 0; i--) {
46
22.5M
        const int32_t rc    = scan[i];
47
22.5M
        const QmVal   wt    = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
48
22.5M
        const int32_t coeff = coeff_ptr[rc] * wt;
49
50
22.5M
        if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) && coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS))) {
51
22.3M
            non_zero_count--;
52
22.3M
        } else {
53
143k
            break;
54
143k
        }
55
22.5M
    }
56
57
    // Quantization pass: All coefficients with index >= zero_flag are
58
    // skippable. Note: zero_flag can be zero.
59
1.21M
    for (intptr_t i = 0; i < non_zero_count; i++) {
60
11.2k
        const int32_t rc         = scan[i];
61
11.2k
        const int32_t coeff      = coeff_ptr[rc];
62
11.2k
        const int     coeff_sign = coeff < 0 ? -1 : 0;
63
11.2k
        const int32_t abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
64
65
11.2k
        const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
66
11.2k
        if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
67
11.2k
            int64_t tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), INT16_MIN, INT16_MAX);
68
11.2k
            tmp *= wt;
69
11.2k
            int32_t tmp32         = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >>
70
11.2k
                                      (16 - log_scale + AOM_QM_BITS)); // quantization
71
11.2k
            qcoeff_ptr[rc]        = (tmp32 ^ coeff_sign) - coeff_sign;
72
11.2k
            const int32_t iwt     = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
73
11.2k
            const int32_t dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
74
11.2k
            const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
75
11.2k
            dqcoeff_ptr[rc]           = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
76
77
11.2k
            if (tmp32) {
78
11.2k
                eob = i;
79
11.2k
            }
80
11.2k
        }
81
11.2k
    }
82
1.20M
    *eob_ptr = (uint16_t)(eob + 1);
83
1.20M
}
84
85
void svt_aom_highbd_quantize_b_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
86
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
87
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
88
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr,
89
0
                                 const QmVal* iqm_ptr, const int32_t log_scale) {
90
0
    intptr_t eob = -1;
91
0
    (void)iscan;
92
93
0
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
94
0
    memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
95
96
0
    const int32_t zbins[2]  = {ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)};
97
0
    const int32_t nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
98
0
    intptr_t      idx_arr[4096];
99
0
    int           idx = 0;
100
    // Pre-scan pass
101
0
    for (intptr_t i = 0; i < n_coeffs; i++) {
102
0
        const int32_t rc    = scan[i];
103
0
        const QmVal   wt    = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
104
0
        const int32_t coeff = coeff_ptr[rc] * wt;
105
106
        // If the coefficient is out of the base ZBIN range, keep it for
107
        // quantization.
108
0
        if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) || coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS))) {
109
0
            idx_arr[idx++] = i;
110
0
        }
111
0
    }
112
113
    // Quantization pass: only process the coefficients selected in
114
    // pre-scan pass. Note: idx can be zero.
115
0
    for (int i = 0; i < idx; i++) {
116
0
        const int32_t rc          = scan[idx_arr[i]];
117
0
        const int32_t coeff       = coeff_ptr[rc];
118
0
        const int     coeff_sign  = coeff < 0 ? -1 : 0;
119
0
        const QmVal   wt          = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
120
0
        const QmVal   iwt         = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
121
0
        const int32_t abs_coeff   = (coeff ^ coeff_sign) - coeff_sign;
122
0
        const int64_t tmp1        = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
123
0
        const int64_t tmpw        = tmp1 * wt;
124
0
        const int64_t tmp2        = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
125
0
        const int32_t abs_qcoeff  = (int32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));
126
0
        qcoeff_ptr[rc]            = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
127
0
        int32_t       dequant     = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
128
0
        const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
129
0
        dqcoeff_ptr[rc]           = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
130
0
        if (abs_qcoeff) {
131
0
            eob = idx_arr[i];
132
0
        }
133
0
    }
134
135
0
    *eob_ptr = (uint16_t)(eob + 1);
136
0
}
137
138
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
139
void svt_av1_highbd_quantize_b_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
140
                                      TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc,
141
0
                                      const QuantParam* qparam) {
142
0
    const QmVal* qm_ptr  = qparam->qmatrix;
143
0
    const QmVal* iqm_ptr = qparam->iqmatrix;
144
0
    if (qm_ptr || iqm_ptr) {
145
0
        svt_av1_highbd_quantize_b_qm(coeff_ptr,
146
0
                                     n_coeffs,
147
0
                                     p->zbin_qtx,
148
0
                                     p->round_qtx,
149
0
                                     p->quant_qtx,
150
0
                                     p->quant_shift_qtx,
151
0
                                     qcoeff_ptr,
152
0
                                     dqcoeff_ptr,
153
0
                                     p->dequant_qtx,
154
0
                                     eob_ptr,
155
0
                                     sc->scan,
156
0
                                     sc->iscan,
157
0
                                     qm_ptr,
158
0
                                     iqm_ptr,
159
0
                                     qparam->log_scale);
160
0
    } else {
161
0
        svt_aom_highbd_quantize_b(coeff_ptr,
162
0
                                  n_coeffs,
163
0
                                  p->zbin_qtx,
164
0
                                  p->round_qtx,
165
0
                                  p->quant_qtx,
166
0
                                  p->quant_shift_qtx,
167
0
                                  qcoeff_ptr,
168
0
                                  dqcoeff_ptr,
169
0
                                  p->dequant_qtx,
170
0
                                  eob_ptr,
171
0
                                  sc->scan,
172
0
                                  sc->iscan,
173
0
                                  NULL,
174
0
                                  NULL,
175
0
                                  qparam->log_scale);
176
0
    }
177
0
    assert(qparam->log_scale <= 2);
178
0
}
179
#endif
180
181
static void av1_quantize_b_facade_ii(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
182
                                     TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc,
183
1.19M
                                     const QuantParam* qparam) {
184
1.19M
    const QmVal* qm_ptr  = qparam->qmatrix;
185
1.19M
    const QmVal* iqm_ptr = qparam->iqmatrix;
186
1.19M
    if (qm_ptr || iqm_ptr) {
187
0
        svt_av1_quantize_b_qm(coeff_ptr,
188
0
                              n_coeffs,
189
0
                              p->zbin_qtx,
190
0
                              p->round_qtx,
191
0
                              p->quant_qtx,
192
0
                              p->quant_shift_qtx,
193
0
                              qcoeff_ptr,
194
0
                              dqcoeff_ptr,
195
0
                              p->dequant_qtx,
196
0
                              eob_ptr,
197
0
                              sc->scan,
198
0
                              sc->iscan,
199
0
                              qm_ptr,
200
0
                              iqm_ptr,
201
0
                              qparam->log_scale);
202
1.19M
    } else {
203
1.19M
        svt_aom_quantize_b(coeff_ptr,
204
1.19M
                           n_coeffs,
205
1.19M
                           p->zbin_qtx,
206
1.19M
                           p->round_qtx,
207
1.19M
                           p->quant_qtx,
208
1.19M
                           p->quant_shift_qtx,
209
1.19M
                           qcoeff_ptr,
210
1.19M
                           dqcoeff_ptr,
211
1.19M
                           p->dequant_qtx,
212
1.19M
                           eob_ptr,
213
1.19M
                           sc->scan,
214
1.19M
                           sc->iscan,
215
1.19M
                           NULL,
216
1.19M
                           NULL,
217
1.19M
                           qparam->log_scale);
218
1.19M
    }
219
1.19M
    assert(qparam->log_scale <= 2);
220
1.19M
}
221
222
static void quantize_fp_helper_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
223
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
224
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
225
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr,
226
22.6k
                                 const QmVal* iqm_ptr, int log_scale) {
227
22.6k
    int       i, eob = -1;
228
22.6k
    const int rounding[2] = {ROUND_POWER_OF_TWO(round_ptr[0], log_scale), ROUND_POWER_OF_TWO(round_ptr[1], log_scale)};
229
22.6k
    (void)zbin_ptr;
230
22.6k
    (void)quant_shift_ptr;
231
22.6k
    (void)iscan;
232
233
22.6k
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
234
22.6k
    memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
235
236
22.6k
    if (qm_ptr == NULL && iqm_ptr == NULL) {
237
10.1M
        for (i = 0; i < n_coeffs; i++) {
238
10.0M
            const int     rc         = scan[i];
239
10.0M
            const int32_t thresh     = (int32_t)(dequant_ptr[rc != 0]);
240
10.0M
            const int     coeff      = coeff_ptr[rc];
241
10.0M
            const int     coeff_sign = coeff < 0 ? -1 : 0;
242
10.0M
            int64_t       abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
243
10.0M
            int           tmp32      = 0;
244
10.0M
            if ((abs_coeff << (1 + log_scale)) >= thresh) {
245
10.4k
                abs_coeff = clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
246
10.4k
                tmp32     = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
247
10.4k
                if (tmp32) {
248
10.3k
                    qcoeff_ptr[rc]            = (tmp32 ^ coeff_sign) - coeff_sign;
249
10.3k
                    const TranLow abs_dqcoeff = (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
250
10.3k
                    dqcoeff_ptr[rc]           = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
251
10.3k
                }
252
10.4k
            }
253
10.0M
            if (tmp32) {
254
10.3k
                eob = i;
255
10.3k
            }
256
10.0M
        }
257
22.6k
    } else {
258
        // Quantization pass: All coefficients with index >= zero_flag are
259
        // skippable. Note: zero_flag can be zero.
260
0
        for (i = 0; i < n_coeffs; i++) {
261
0
            const int   rc         = scan[i];
262
0
            const int   coeff      = coeff_ptr[rc];
263
0
            const QmVal wt         = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
264
0
            const QmVal iwt        = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
265
0
            const int   dequant    = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
266
0
            const int   coeff_sign = coeff < 0 ? -1 : 0;
267
0
            int64_t     abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
268
0
            int         tmp32      = 0;
269
0
            if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
270
0
                abs_coeff += rounding[rc != 0];
271
0
                abs_coeff      = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
272
0
                tmp32          = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));
273
0
                qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
274
0
                const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
275
0
                dqcoeff_ptr[rc]           = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
276
0
            }
277
278
0
            if (tmp32) {
279
0
                eob = i;
280
0
            }
281
0
        }
282
0
    }
283
22.6k
    *eob_ptr = eob + 1;
284
22.6k
}
285
286
void svt_av1_quantize_fp_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
287
                           const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
288
                           TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr,
289
13.7k
                           const int16_t* scan, const int16_t* iscan) {
290
13.7k
    quantize_fp_helper_c(coeff_ptr,
291
13.7k
                         n_coeffs,
292
13.7k
                         zbin_ptr,
293
13.7k
                         round_ptr,
294
13.7k
                         quant_ptr,
295
13.7k
                         quant_shift_ptr,
296
13.7k
                         qcoeff_ptr,
297
13.7k
                         dqcoeff_ptr,
298
13.7k
                         dequant_ptr,
299
13.7k
                         eob_ptr,
300
13.7k
                         scan,
301
13.7k
                         iscan,
302
13.7k
                         NULL,
303
13.7k
                         NULL,
304
13.7k
                         0);
305
13.7k
}
306
307
void svt_av1_quantize_fp_qm_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
308
                              const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
309
                              TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr,
310
                              const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr,
311
0
                              int16_t log_scale) {
312
0
    quantize_fp_helper_c(coeff_ptr,
313
0
                         n_coeffs,
314
0
                         zbin_ptr,
315
0
                         round_ptr,
316
0
                         quant_ptr,
317
0
                         quant_shift_ptr,
318
0
                         qcoeff_ptr,
319
0
                         dqcoeff_ptr,
320
0
                         dequant_ptr,
321
0
                         eob_ptr,
322
0
                         scan,
323
0
                         iscan,
324
0
                         qm_ptr,
325
0
                         iqm_ptr,
326
0
                         log_scale);
327
0
}
328
329
static void highbd_quantize_fp_helper_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr,
330
                                        const int16_t* round_ptr, const int16_t* quant_ptr,
331
                                        const int16_t* quant_shift_ptr, TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr,
332
                                        const int16_t* dequant_ptr, uint16_t* eob_ptr, const int16_t* scan,
333
                                        const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr,
334
0
                                        int16_t log_scale) {
335
0
    int       i;
336
0
    int       eob   = -1;
337
0
    const int shift = 16 - log_scale;
338
0
    (void)zbin_ptr;
339
0
    (void)quant_shift_ptr;
340
0
    (void)iscan;
341
342
0
    if (qm_ptr || iqm_ptr) {
343
        // Quantization pass: All coefficients with index >= zero_flag are
344
        // skippable. Note: zero_flag can be zero.
345
0
        for (i = 0; i < count; i++) {
346
0
            const int     rc         = scan[i];
347
0
            const int     coeff      = coeff_ptr[rc];
348
0
            const QmVal   wt         = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
349
0
            const QmVal   iwt        = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
350
0
            const int     dequant    = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
351
0
            const int     coeff_sign = coeff < 0 ? -1 : 0;
352
0
            const int64_t abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
353
0
            if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
354
0
                const int64_t tmp         = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
355
0
                const int     abs_qcoeff  = (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
356
0
                qcoeff_ptr[rc]            = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
357
0
                const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
358
0
                dqcoeff_ptr[rc]           = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
359
0
                if (abs_qcoeff) {
360
0
                    eob = i;
361
0
                }
362
0
            } else {
363
0
                qcoeff_ptr[rc]  = 0;
364
0
                dqcoeff_ptr[rc] = 0;
365
0
            }
366
0
        }
367
0
    } else {
368
0
        const int log_scaled_round_arr[2] = {
369
0
            ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
370
0
            ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
371
0
        };
372
0
        for (i = 0; i < count; i++) {
373
0
            const int rc               = scan[i];
374
0
            const int coeff            = coeff_ptr[rc];
375
0
            const int rc01             = (rc != 0);
376
0
            const int coeff_sign       = coeff < 0 ? -1 : 0;
377
0
            const int abs_coeff        = (coeff ^ coeff_sign) - coeff_sign;
378
0
            const int log_scaled_round = log_scaled_round_arr[rc01];
379
0
            if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
380
0
                const int     quant       = quant_ptr[rc01];
381
0
                const int     dequant     = dequant_ptr[rc01];
382
0
                const int64_t tmp         = (int64_t)abs_coeff + log_scaled_round;
383
0
                const int     abs_qcoeff  = (int)((tmp * quant) >> shift);
384
0
                qcoeff_ptr[rc]            = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
385
0
                const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
386
0
                if (abs_qcoeff) {
387
0
                    eob = i;
388
0
                }
389
0
                dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
390
0
            } else {
391
0
                qcoeff_ptr[rc]  = 0;
392
0
                dqcoeff_ptr[rc] = 0;
393
0
            }
394
0
        }
395
0
    }
396
0
    *eob_ptr = eob + 1;
397
0
}
398
399
void svt_av1_highbd_quantize_fp_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr,
400
                                  const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
401
                                  TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
402
0
                                  uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, int16_t log_scale) {
403
0
    highbd_quantize_fp_helper_c(coeff_ptr,
404
0
                                count,
405
0
                                zbin_ptr,
406
0
                                round_ptr,
407
0
                                quant_ptr,
408
0
                                quant_shift_ptr,
409
0
                                qcoeff_ptr,
410
0
                                dqcoeff_ptr,
411
0
                                dequant_ptr,
412
0
                                eob_ptr,
413
0
                                scan,
414
0
                                iscan,
415
0
                                NULL,
416
0
                                NULL,
417
0
                                log_scale);
418
0
}
419
420
void svt_av1_quantize_fp_32x32_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
421
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
422
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
423
6.09k
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan) {
424
6.09k
    quantize_fp_helper_c(coeff_ptr,
425
6.09k
                         n_coeffs,
426
6.09k
                         zbin_ptr,
427
6.09k
                         round_ptr,
428
6.09k
                         quant_ptr,
429
6.09k
                         quant_shift_ptr,
430
6.09k
                         qcoeff_ptr,
431
6.09k
                         dqcoeff_ptr,
432
6.09k
                         dequant_ptr,
433
6.09k
                         eob_ptr,
434
6.09k
                         scan,
435
6.09k
                         iscan,
436
6.09k
                         NULL,
437
6.09k
                         NULL,
438
6.09k
                         1);
439
6.09k
}
440
441
void svt_av1_quantize_fp_64x64_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
442
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
443
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
444
2.77k
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan) {
445
2.77k
    quantize_fp_helper_c(coeff_ptr,
446
2.77k
                         n_coeffs,
447
2.77k
                         zbin_ptr,
448
2.77k
                         round_ptr,
449
2.77k
                         quant_ptr,
450
2.77k
                         quant_shift_ptr,
451
2.77k
                         qcoeff_ptr,
452
2.77k
                         dqcoeff_ptr,
453
2.77k
                         dequant_ptr,
454
2.77k
                         eob_ptr,
455
2.77k
                         scan,
456
2.77k
                         iscan,
457
2.77k
                         NULL,
458
2.77k
                         NULL,
459
2.77k
                         2);
460
2.77k
}
461
462
void svt_av1_quantize_fp_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
463
                                TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc,
464
22.6k
                                const QuantParam* qparam) {
465
22.6k
    const QmVal* qm_ptr  = qparam->qmatrix;
466
22.6k
    const QmVal* iqm_ptr = qparam->iqmatrix;
467
468
22.6k
    if (qm_ptr || iqm_ptr) {
469
0
        svt_av1_quantize_fp_qm(coeff_ptr,
470
0
                               n_coeffs,
471
0
                               p->zbin_qtx,
472
0
                               p->round_fp_qtx,
473
0
                               p->quant_fp_qtx,
474
0
                               p->quant_shift_qtx,
475
0
                               qcoeff_ptr,
476
0
                               dqcoeff_ptr,
477
0
                               p->dequant_qtx,
478
0
                               eob_ptr,
479
0
                               sc->scan,
480
0
                               sc->iscan,
481
0
                               qm_ptr,
482
0
                               iqm_ptr,
483
0
                               qparam->log_scale);
484
22.6k
    } else {
485
22.6k
        switch (qparam->log_scale) {
486
13.7k
        case 0:
487
13.7k
            svt_av1_quantize_fp(coeff_ptr,
488
13.7k
                                n_coeffs,
489
13.7k
                                p->zbin_qtx,
490
13.7k
                                p->round_fp_qtx,
491
13.7k
                                p->quant_fp_qtx,
492
13.7k
                                p->quant_shift_qtx,
493
13.7k
                                qcoeff_ptr,
494
13.7k
                                dqcoeff_ptr,
495
13.7k
                                p->dequant_qtx,
496
13.7k
                                eob_ptr,
497
13.7k
                                sc->scan,
498
13.7k
                                sc->iscan);
499
13.7k
            break;
500
6.09k
        case 1:
501
6.09k
            svt_av1_quantize_fp_32x32(coeff_ptr,
502
6.09k
                                      n_coeffs,
503
6.09k
                                      p->zbin_qtx,
504
6.09k
                                      p->round_fp_qtx,
505
6.09k
                                      p->quant_fp_qtx,
506
6.09k
                                      p->quant_shift_qtx,
507
6.09k
                                      qcoeff_ptr,
508
6.09k
                                      dqcoeff_ptr,
509
6.09k
                                      p->dequant_qtx,
510
6.09k
                                      eob_ptr,
511
6.09k
                                      sc->scan,
512
6.09k
                                      sc->iscan);
513
6.09k
            break;
514
2.77k
        case 2:
515
2.77k
            svt_av1_quantize_fp_64x64(coeff_ptr,
516
2.77k
                                      n_coeffs,
517
2.77k
                                      p->zbin_qtx,
518
2.77k
                                      p->round_fp_qtx,
519
2.77k
                                      p->quant_fp_qtx,
520
2.77k
                                      p->quant_shift_qtx,
521
2.77k
                                      qcoeff_ptr,
522
2.77k
                                      dqcoeff_ptr,
523
2.77k
                                      p->dequant_qtx,
524
2.77k
                                      eob_ptr,
525
2.77k
                                      sc->scan,
526
2.77k
                                      sc->iscan);
527
2.77k
            break;
528
0
        default:
529
0
            assert(0);
530
22.6k
        }
531
22.6k
    }
532
22.6k
}
533
534
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
535
void svt_av1_highbd_quantize_fp_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
536
                                       TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr,
537
0
                                       const ScanOrder* sc, const QuantParam* qparam) {
538
0
    const QmVal* qm_ptr  = qparam->qmatrix;
539
0
    const QmVal* iqm_ptr = qparam->iqmatrix;
540
0
    if (qm_ptr != NULL && iqm_ptr != NULL) {
541
0
        svt_av1_highbd_quantize_fp_qm(coeff_ptr,
542
0
                                      n_coeffs,
543
0
                                      p->zbin_qtx,
544
0
                                      p->round_fp_qtx,
545
0
                                      p->quant_fp_qtx,
546
0
                                      p->quant_shift_qtx,
547
0
                                      qcoeff_ptr,
548
0
                                      dqcoeff_ptr,
549
0
                                      p->dequant_qtx,
550
0
                                      eob_ptr,
551
0
                                      sc->scan,
552
0
                                      sc->iscan,
553
0
                                      qm_ptr,
554
0
                                      iqm_ptr,
555
0
                                      qparam->log_scale);
556
0
    } else {
557
0
        svt_av1_highbd_quantize_fp(coeff_ptr,
558
0
                                   n_coeffs,
559
0
                                   p->zbin_qtx,
560
0
                                   p->round_fp_qtx,
561
0
                                   p->quant_fp_qtx,
562
0
                                   p->quant_shift_qtx,
563
0
                                   qcoeff_ptr,
564
0
                                   dqcoeff_ptr,
565
0
                                   p->dequant_qtx,
566
0
                                   eob_ptr,
567
0
                                   sc->scan,
568
0
                                   sc->iscan,
569
0
                                   qparam->log_scale);
570
0
    }
571
0
}
572
#endif
573
574
void svt_av1_highbd_quantize_fp_qm_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr,
575
                                     const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
576
                                     TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
577
                                     uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr,
578
0
                                     const QmVal* iqm_ptr, int16_t log_scale) {
579
0
    highbd_quantize_fp_helper_c(coeff_ptr,
580
0
                                count,
581
0
                                zbin_ptr,
582
0
                                round_ptr,
583
0
                                quant_ptr,
584
0
                                quant_shift_ptr,
585
0
                                qcoeff_ptr,
586
0
                                dqcoeff_ptr,
587
0
                                dequant_ptr,
588
0
                                eob_ptr,
589
0
                                scan,
590
0
                                iscan,
591
0
                                qm_ptr,
592
0
                                iqm_ptr,
593
0
                                log_scale);
594
0
}
595
596
static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx, int bwl, int height, const uint8_t* levels,
597
8.75k
                                               int coeff_idx, TxSize tx_size, TxClass tx_class) {
598
8.75k
    if (is_last) {
599
8.75k
        if (scan_idx == 0) {
600
8.74k
            return 0;
601
8.74k
        }
602
1
        if (scan_idx <= (height << bwl) >> 3) {
603
0
            return 1;
604
0
        }
605
1
        if (scan_idx <= (height << bwl) >> 2) {
606
0
            return 2;
607
0
        }
608
1
        return 3;
609
1
    }
610
0
    return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
611
8.75k
}
612
613
17.4k
static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
614
17.4k
    if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
615
15.8k
        const int32_t r      = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
616
15.8k
        const int32_t length = get_msb(r) + 1;
617
15.8k
        return av1_cost_literal(2 * length - 1);
618
15.8k
    }
619
1.57k
    return 0;
620
17.4k
}
621
622
17.4k
static INLINE int get_br_cost(TranLow level, const int* coeff_lps) {
623
17.4k
    const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
624
17.4k
    return coeff_lps[base_range] + get_golomb_cost(level);
625
17.4k
}
626
627
static INLINE int get_coeff_cost_general(int is_last, int ci, TranLow abs_qc, int sign, int coeff_ctx, int dc_sign_ctx,
628
                                         const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class,
629
17.4k
                                         const uint8_t* levels) {
630
17.4k
    int cost = 0;
631
17.4k
    if (is_last) {
632
17.4k
        cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
633
17.4k
    } else {
634
0
        cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
635
0
    }
636
17.4k
    if (abs_qc != 0) {
637
17.4k
        if (ci == 0) {
638
17.4k
            cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
639
17.4k
        } else {
640
0
            cost += av1_cost_literal(1);
641
0
        }
642
17.4k
        if (abs_qc > NUM_BASE_LEVELS) {
643
17.4k
            int br_ctx;
644
17.4k
            if (is_last) {
645
17.4k
                br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
646
17.4k
            } else {
647
0
                br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
648
0
            }
649
17.4k
            cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
650
17.4k
        }
651
17.4k
    }
652
17.4k
    return cost;
653
17.4k
}
654
655
29.4k
static INLINE int64_t get_coeff_dist(TranLow tcoeff, TranLow dqcoeff, int shift) {
656
29.4k
    return SQR(((int64_t)tcoeff - dqcoeff) * (int64_t)(1lu << shift));
657
29.4k
}
658
659
8.75k
static INLINE void get_qc_dqc_low(TranLow abs_qc, int sign, int dqv, int shift, TranLow* qc_low, TranLow* dqc_low) {
660
8.75k
    TranLow abs_qc_low = abs_qc - 1;
661
8.75k
    *qc_low            = (-sign ^ abs_qc_low) + sign;
662
8.75k
    assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
663
8.75k
    TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
664
8.75k
    *dqc_low            = (-sign ^ abs_dqc_low) + sign;
665
8.75k
    assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
666
8.75k
}
667
668
static const int golomb_bits_cost[32] = {0,       512,     512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
669
                                         512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
670
                                         512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
671
                                         512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9};
672
static const int golomb_cost_diff[32] = {0,       512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
673
                                         512 * 2, 0,   0,       0, 0,       0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0};
674
675
0
static INLINE int get_br_cost_with_diff(TranLow level, const int* coeff_lps, int* diff) {
676
0
    const int base_range  = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
677
0
    int       golomb_bits = 0;
678
0
    if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
679
0
        *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
680
0
    }
681
682
0
    if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
683
0
        int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
684
0
        if (r < 32) {
685
0
            golomb_bits = golomb_bits_cost[r];
686
0
            *diff += golomb_cost_diff[r];
687
0
        } else {
688
0
            golomb_bits = get_golomb_cost(level);
689
0
            *diff += (r & (r - 1)) == 0 ? 1024 : 0;
690
0
        }
691
0
    }
692
693
0
    return coeff_lps[base_range] + golomb_bits;
694
0
}
695
696
static AOM_FORCE_INLINE int get_two_coeff_cost_simple(int ci, TranLow abs_qc, int coeff_ctx,
697
                                                      const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class,
698
0
                                                      const uint8_t* levels, int* cost_low) {
699
    // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
700
    // and not the last (scan_idx != eob - 1)
701
0
    assert(ci > 0);
702
    //assert(abs_qc + 4 < 4);
703
0
    int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
704
0
    int diff = 0;
705
0
    if (abs_qc <= 3) {
706
0
        diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
707
0
    }
708
0
    if (abs_qc) {
709
0
        cost += av1_cost_literal(1);
710
0
        if (abs_qc > NUM_BASE_LEVELS) {
711
0
            const int br_ctx      = get_br_ctx(levels, ci, bwl, tx_class);
712
0
            int       brcost_diff = 0;
713
0
            cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx], &brcost_diff);
714
0
            diff += brcost_diff;
715
0
        }
716
0
    }
717
0
    *cost_low = cost - diff;
718
719
0
    return cost;
720
0
}
721
722
static INLINE int get_coeff_cost_eob(int ci, TranLow abs_qc, int sign, int coeff_ctx, int dc_sign_ctx,
723
1.57k
                                     const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class) {
724
1.57k
    int cost = 0;
725
1.57k
    cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
726
1.57k
    if (abs_qc != 0) {
727
1.57k
        if (ci == 0) {
728
1.57k
            cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
729
1.57k
        } else {
730
0
            cost += av1_cost_literal(1);
731
0
        }
732
1.57k
        if (abs_qc > NUM_BASE_LEVELS) {
733
0
            int br_ctx;
734
0
            br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
735
0
            cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
736
0
        }
737
1.57k
    }
738
1.57k
    return cost;
739
1.57k
}
740
741
8.75k
static INLINE int get_dqv(const int16_t* dequant, int coeff_idx, const QmVal* iqm_ptr) {
742
8.75k
    int dqv = dequant[!!coeff_idx];
743
8.75k
    if (iqm_ptr != NULL) {
744
0
        dqv = ((iqm_ptr[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
745
0
    }
746
8.75k
    return dqv;
747
8.75k
}
748
749
static AOM_FORCE_INLINE void update_coeff_eob(int* accu_rate, int64_t* accu_dist, uint16_t* eob, int* nz_num,
750
                                              int* nz_ci, int si, TxSize tx_size, TxClass tx_class, int bwl, int height,
751
                                              int dc_sign_ctx, int64_t rdmult, int shift, const int16_t* dequant,
752
                                              const int16_t* scan, const LvMapEobCost* txb_eob_costs,
753
                                              const LvMapCoeffCost* txb_costs, const TranLow* tcoeff, TranLow* qcoeff,
754
0
                                              TranLow* dqcoeff, uint8_t* levels, int sharpness, const QmVal* iqm_ptr) {
755
0
    assert(si != *eob - 1);
756
0
    const int     ci        = scan[si];
757
0
    const int     dqv       = get_dqv(dequant, ci, iqm_ptr);
758
0
    const TranLow qc        = qcoeff[ci];
759
0
    const int     coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
760
0
    if (qc == 0) {
761
0
        *accu_rate += txb_costs->base_cost[coeff_ctx][0];
762
0
    } else {
763
0
        int           lower_level = 0;
764
0
        const TranLow abs_qc      = abs(qc);
765
0
        const TranLow tqc         = tcoeff[ci];
766
0
        const TranLow dqc         = dqcoeff[ci];
767
0
        const int     sign        = (qc < 0) ? 1 : 0;
768
0
        const int64_t dist0       = get_coeff_dist(tqc, 0, shift);
769
0
        int64_t       dist        = get_coeff_dist(tqc, dqc, shift) - dist0;
770
0
        int           rate        = get_coeff_cost_general(
771
0
            0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
772
0
        int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
773
774
0
        TranLow qc_low, dqc_low;
775
0
        TranLow abs_qc_low;
776
0
        int64_t dist_low, rd_low;
777
0
        int     rate_low;
778
0
        if (abs_qc == 1) {
779
0
            abs_qc_low = 0;
780
0
            dqc_low = qc_low = 0;
781
0
            dist_low         = 0;
782
0
            rate_low         = txb_costs->base_cost[coeff_ctx][0];
783
0
            rd_low           = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
784
0
        } else {
785
0
            get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
786
0
            abs_qc_low = abs_qc - 1;
787
0
            dist_low   = get_coeff_dist(tqc, dqc_low, shift) - dist0;
788
0
            rate_low   = get_coeff_cost_general(
789
0
                0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
790
0
            rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
791
0
        }
792
793
0
        int       lower_level_new_eob = 0;
794
0
        const int new_eob             = si + 1;
795
0
        const int coeff_ctx_new_eob   = get_lower_levels_ctx_eob(bwl, height, si);
796
0
        const int new_eob_cost        = get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
797
0
        int       rate_coeff_eob      = new_eob_cost +
798
0
            get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class);
799
0
        int64_t dist_new_eob = dist;
800
0
        int64_t rd_new_eob   = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
801
802
0
        if (abs_qc_low > 0) {
803
0
            const int rate_coeff_eob_low = new_eob_cost +
804
0
                get_coeff_cost_eob(ci, abs_qc_low, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class);
805
0
            const int64_t dist_new_eob_low = dist_low;
806
0
            const int64_t rd_new_eob_low   = RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
807
0
            if (rd_new_eob_low < rd_new_eob) {
808
0
                lower_level_new_eob = 1;
809
0
                rd_new_eob          = rd_new_eob_low;
810
0
                rate_coeff_eob      = rate_coeff_eob_low;
811
0
                dist_new_eob        = dist_new_eob_low;
812
0
            }
813
0
        }
814
815
0
        if (rd_low < rd) {
816
0
            lower_level = 1;
817
0
            rd          = rd_low;
818
0
            rate        = rate_low;
819
0
            dist        = dist_low;
820
0
        }
821
822
0
        if (sharpness == 0 && rd_new_eob < rd) {
823
0
            for (int ni = 0; ni < *nz_num; ++ni) {
824
0
                int last_ci                          = nz_ci[ni];
825
0
                levels[get_padded_idx(last_ci, bwl)] = 0;
826
0
                qcoeff[last_ci]                      = 0;
827
0
                dqcoeff[last_ci]                     = 0;
828
0
            }
829
0
            *eob        = new_eob;
830
0
            *nz_num     = 0;
831
0
            *accu_rate  = rate_coeff_eob;
832
0
            *accu_dist  = dist_new_eob;
833
0
            lower_level = lower_level_new_eob;
834
0
        } else {
835
0
            *accu_rate += rate;
836
0
            *accu_dist += dist;
837
0
        }
838
839
0
        if (lower_level) {
840
0
            qcoeff[ci]                      = qc_low;
841
0
            dqcoeff[ci]                     = dqc_low;
842
0
            levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
843
0
        }
844
0
        if (qcoeff[ci]) {
845
0
            nz_ci[*nz_num] = ci;
846
0
            ++*nz_num;
847
0
        }
848
0
    }
849
0
}
850
851
static INLINE void update_coeff_general(int* accu_rate, int64_t* accu_dist, int si, int eob, TxSize tx_size,
852
                                        TxClass tx_class, int bwl, int height, int64_t rdmult, int shift,
853
                                        int dc_sign_ctx, const int16_t* dequant, const int16_t* scan,
854
                                        const LvMapCoeffCost* txb_costs, const TranLow* tcoeff, TranLow* qcoeff,
855
8.75k
                                        TranLow* dqcoeff, uint8_t* levels, const QmVal* iqm_ptr) {
856
8.75k
    const int     ci        = scan[si];
857
8.75k
    const int     dqv       = get_dqv(dequant, ci, iqm_ptr);
858
8.75k
    const TranLow qc        = qcoeff[ci];
859
8.75k
    const int     is_last   = si == (eob - 1);
860
8.75k
    const int     coeff_ctx = get_lower_levels_ctx_general(is_last, si, bwl, height, levels, ci, tx_size, tx_class);
861
8.75k
    if (qc == 0) {
862
0
        *accu_rate += txb_costs->base_cost[coeff_ctx][0];
863
8.75k
    } else {
864
8.75k
        const int     sign   = (qc < 0) ? 1 : 0;
865
8.75k
        const TranLow abs_qc = abs(qc);
866
8.75k
        const TranLow tqc    = tcoeff[ci];
867
8.75k
        const TranLow dqc    = dqcoeff[ci];
868
8.75k
        const int64_t dist   = get_coeff_dist(tqc, dqc, shift);
869
8.75k
        const int64_t dist0  = get_coeff_dist(tqc, 0, shift);
870
8.75k
        const int     rate   = get_coeff_cost_general(
871
8.75k
            is_last, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
872
8.75k
        const int64_t rd = RDCOST(rdmult, rate, dist);
873
874
8.75k
        TranLow qc_low, dqc_low;
875
8.75k
        TranLow abs_qc_low;
876
8.75k
        int64_t dist_low, rd_low;
877
8.75k
        int     rate_low;
878
8.75k
        if (abs_qc == 1) {
879
0
            abs_qc_low = qc_low = dqc_low = 0;
880
0
            dist_low                      = dist0;
881
0
            rate_low                      = txb_costs->base_cost[coeff_ctx][0];
882
8.75k
        } else {
883
8.75k
            get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
884
8.75k
            abs_qc_low = abs_qc - 1;
885
8.75k
            dist_low   = get_coeff_dist(tqc, dqc_low, shift);
886
8.75k
            rate_low   = get_coeff_cost_general(
887
8.75k
                is_last, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
888
8.75k
        }
889
890
8.75k
        rd_low = RDCOST(rdmult, rate_low, dist_low);
891
8.75k
        if (rd_low < rd) {
892
80
            qcoeff[ci]                      = qc_low;
893
80
            dqcoeff[ci]                     = dqc_low;
894
80
            levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
895
80
            *accu_rate += rate_low;
896
80
            *accu_dist += dist_low - dist0;
897
8.67k
        } else {
898
8.67k
            *accu_rate += rate;
899
8.67k
            *accu_dist += dist - dist0;
900
8.67k
        }
901
8.75k
    }
902
8.75k
}
903
904
static AOM_FORCE_INLINE void update_coeff_simple(int* accu_rate, int si, int eob, TxSize tx_size, TxClass tx_class,
905
                                                 int bwl, int64_t rdmult, int shift, const int16_t* dequant,
906
                                                 const int16_t* scan, const LvMapCoeffCost* txb_costs,
907
                                                 const TranLow* tcoeff, TranLow* qcoeff, TranLow* dqcoeff,
908
0
                                                 uint8_t* levels, const QmVal* iqm_ptr) {
909
0
    const int dqv = get_dqv(dequant, scan[si], iqm_ptr);
910
0
    (void)eob;
911
    // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
912
    // and not the last (scan_idx != eob - 1)
913
0
    assert(si != eob - 1);
914
0
    assert(si > 0);
915
0
    const int     ci        = scan[si];
916
0
    const TranLow qc        = qcoeff[ci];
917
0
    const int     coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
918
0
    if (qc == 0) {
919
0
        *accu_rate += txb_costs->base_cost[coeff_ctx][0];
920
0
    } else {
921
0
        const TranLow abs_qc   = abs(qc);
922
0
        const TranLow abs_tqc  = abs(tcoeff[ci]);
923
0
        const TranLow abs_dqc  = abs(dqcoeff[ci]);
924
0
        int           rate_low = 0;
925
0
        const int rate = get_two_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
926
0
        if (abs_dqc < abs_tqc) {
927
0
            *accu_rate += rate;
928
0
            return;
929
0
        }
930
931
0
        const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
932
0
        const int64_t rd   = RDCOST(rdmult, rate, dist);
933
934
0
        const TranLow abs_qc_low  = abs_qc - 1;
935
0
        const TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
936
0
        const int64_t dist_low    = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
937
0
        const int64_t rd_low      = RDCOST(rdmult, rate_low, dist_low);
938
939
0
        if (rd_low < rd) {
940
0
            const int sign                  = (qc < 0) ? 1 : 0;
941
0
            qcoeff[ci]                      = (-sign ^ abs_qc_low) + sign;
942
0
            dqcoeff[ci]                     = (-sign ^ abs_dqc_low) + sign;
943
0
            levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
944
0
            *accu_rate += rate_low;
945
0
        } else {
946
0
            *accu_rate += rate;
947
0
        }
948
0
    }
949
0
}
950
951
static INLINE void update_skip(int* accu_rate, int64_t accu_dist, uint16_t* eob, int nz_num, int* nz_ci, int64_t rdmult,
952
10.3k
                               int skip_cost, int non_skip_cost, TranLow* qcoeff, TranLow* dqcoeff, int sharpness) {
953
10.3k
    const int64_t rd         = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
954
10.3k
    const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
955
10.3k
    if (sharpness == 0 && rd_new_eob < rd) {
956
0
        for (int i = 0; i < nz_num; ++i) {
957
0
            const int ci = nz_ci[i];
958
0
            qcoeff[ci]   = 0;
959
0
            dqcoeff[ci]  = 0;
960
            // no need to set up levels because this is the last step
961
            // levels[get_padded_idx(ci, bwl)] = 0;
962
0
        }
963
0
        *accu_rate = 0;
964
0
        *eob       = 0;
965
0
    }
966
10.3k
}
967
968
enum {
969
    NO_AQ             = 0,
970
    VARIANCE_AQ       = 1,
971
    COMPLEXITY_AQ     = 2,
972
    CYCLIC_REFRESH_AQ = 3,
973
    AQ_MODE_COUNT // This should always be the last member of the enum
974
} UENUM1BYTE(AQ_MODE);
975
976
enum {
977
    NO_DELTA_Q   = 0,
978
    DELTA_Q_ONLY = 1,
979
    DELTA_Q_LF   = 2,
980
    DELTAQ_MODE_COUNT // This should always be the last member of the enum
981
} UENUM1BYTE(DELTAQ_MODE);
982
983
// These numbers are empirically obtained.
984
#if TUNE_CHROMA_SSIM
985
static const int plane_rd_mult[2][REF_TYPES][PLANE_TYPES] = {{
986
                                                                 {17, 13},
987
                                                                 {16, 10},
988
                                                             },
989
                                                             {
990
                                                                 {17, 13},
991
                                                                 {16, 10},
992
                                                             }};
993
#else
994
static const int plane_rd_mult[2][REF_TYPES][PLANE_TYPES] = {{{17, 20}, {16, 20}},
995
                                                             {
996
                                                                 {17, 13},
997
                                                                 {16, 10},
998
                                                             }};
999
#endif
1000
1001
/*
1002
 * Reduce the number of non-zero quantized coefficients before getting to the main/complex RDOQ stage
1003
 * (it performs an early check of whether to zero out each of the non-zero quantized coefficients,
1004
 * and updates the quantized coeffs if it is determined it can be zeroed out).
1005
 */
1006
static INLINE void update_coeff_eob_fast(uint16_t* eob, int shift, const int16_t* dequant_ptr, const int16_t* scan,
1007
0
                                         const TranLow* coeff_ptr, TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr) {
1008
0
    int       eob_out = *eob;
1009
0
    const int zbin[2] = {dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7),
1010
0
                         dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7)};
1011
0
    for (int i = *eob - 1; i >= 0; i--) {
1012
0
        const int rc         = scan[i];
1013
0
        const int qcoeff     = qcoeff_ptr[rc];
1014
0
        const int coeff      = coeff_ptr[rc];
1015
0
        const int coeff_sign = -(coeff < 0);
1016
0
        int64_t   abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
1017
0
        if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) {
1018
0
            eob_out--;
1019
0
            qcoeff_ptr[rc]  = 0;
1020
0
            dqcoeff_ptr[rc] = 0;
1021
0
        } else {
1022
0
            break;
1023
0
        }
1024
0
    }
1025
0
    *eob = eob_out;
1026
0
}
1027
1028
static void svt_fast_optimize_b(const TranLow* coeff_ptr, const MacroblockPlane* p, TranLow* qcoeff_ptr,
1029
                                TranLow* dqcoeff_ptr, uint16_t* eob, TxSize tx_size, TxType tx_type)
1030
1031
0
{
1032
0
    const ScanOrder* const scan_order = get_scan_order(tx_size, tx_type);
1033
0
    const int16_t*         scan       = scan_order->scan;
1034
0
    const int              shift      = av1_get_tx_scale_tab[tx_size];
1035
0
    update_coeff_eob_fast(eob, shift, p->dequant_qtx, scan, coeff_ptr, qcoeff_ptr, dqcoeff_ptr);
1036
0
}
1037
1038
static void svt_av1_optimize_b(PictureControlSet* pcs, ModeDecisionContext* ctx, int16_t txb_skip_context,
1039
                               int16_t dc_sign_context, const TranLow* coeff_ptr, const MacroblockPlane* p,
1040
                               TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob, const QuantParam* qparam,
1041
                               TxSize tx_size, TxType tx_type, bool is_inter, uint8_t use_sharpness,
1042
10.3k
                               uint8_t delta_q_present, uint8_t picture_qp, uint32_t lambda, int plane) {
1043
10.3k
    SequenceControlSet*    scs        = pcs->scs;
1044
10.3k
    bool                   allintra   = scs->allintra;
1045
10.3k
    bool                   rtc        = scs->static_config.rtc;
1046
10.3k
    int                    sharpness  = 0; // No Sharpness
1047
10.3k
    const ScanOrder* const scan_order = get_scan_order(tx_size, tx_type);
1048
10.3k
    const int16_t*         scan       = scan_order->scan;
1049
10.3k
    const int              shift      = av1_get_tx_scale_tab[tx_size];
1050
10.3k
    const PlaneType        plane_type = plane;
1051
10.3k
    const TxSize           txs_ctx    = get_txsize_entropy_ctx(tx_size);
1052
10.3k
    const TxClass          tx_class   = tx_type_to_class[tx_type];
1053
10.3k
    const int              bwl        = get_txb_bwl(tx_size);
1054
10.3k
    const int              width      = get_txb_wide(tx_size);
1055
10.3k
    const int              height     = get_txb_high(tx_size);
1056
10.3k
    assert(width == (1 << bwl));
1057
10.3k
    assert(txs_ctx < TX_SIZES);
1058
10.3k
    const LvMapCoeffCost* txb_costs      = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
1059
10.3k
    const int             eob_multi_size = txsize_log2_minus4[tx_size];
1060
10.3k
    const LvMapEobCost*   txb_eob_costs  = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type];
1061
10.3k
    const int             non_skip_cost  = txb_costs->txb_skip_cost[txb_skip_context][0];
1062
10.3k
    const int             skip_cost      = txb_costs->txb_skip_cost[txb_skip_context][1];
1063
10.3k
    const int             eob_cost       = get_eob_cost(*eob, txb_eob_costs, txb_costs, tx_class);
1064
10.3k
    int                   rweight        = 100;
1065
10.3k
    const int32_t         sharpness_val  = CLIP3(0, 7, pcs->scs->static_config.sharpness);
1066
10.3k
    const int             rshift         = MAX(2, (int)sharpness_val);
1067
10.3k
    if (use_sharpness && delta_q_present && plane == 0) {
1068
0
        int diff = ctx->sb_ptr->qindex - quantizer_to_qindex[picture_qp];
1069
0
        if (diff < 0) {
1070
0
            sharpness = 1;
1071
0
            rweight   = 0;
1072
0
        }
1073
0
    }
1074
10.3k
    const int64_t rdmult =
1075
10.3k
        (((((int64_t)lambda * plane_rd_mult[allintra || rtc][is_inter][plane_type]) * rweight) / 100) + 2) >> rshift;
1076
10.3k
    uint8_t        levels_buf[TX_PAD_2D];
1077
10.3k
    uint8_t* const levels = set_levels(levels_buf, width);
1078
1079
10.3k
    if (*eob > 1) {
1080
0
        svt_av1_txb_init_levels(qcoeff_ptr, width, height, levels);
1081
0
    }
1082
10.3k
    int accu_rate = eob_cost;
1083
1084
10.3k
    int64_t       accu_dist  = 0;
1085
10.3k
    int           si         = *eob - 1;
1086
10.3k
    const int     ci         = scan[si];
1087
10.3k
    const TranLow qc         = qcoeff_ptr[ci];
1088
10.3k
    const TranLow abs_qc     = abs(qc);
1089
10.3k
    const int     sign       = qc < 0;
1090
10.3k
    const int     max_nz_num = 4;
1091
10.3k
    int           nz_num     = 1;
1092
10.3k
    int           nz_ci[5]   = {ci, 0, 0, 0, 0};
1093
10.3k
    if (abs_qc >= 2) {
1094
8.75k
        update_coeff_general(&accu_rate,
1095
8.75k
                             &accu_dist,
1096
8.75k
                             si,
1097
8.75k
                             *eob,
1098
8.75k
                             tx_size,
1099
8.75k
                             tx_class,
1100
8.75k
                             bwl,
1101
8.75k
                             height,
1102
8.75k
                             rdmult,
1103
8.75k
                             shift,
1104
8.75k
                             dc_sign_context,
1105
8.75k
                             p->dequant_qtx,
1106
8.75k
                             scan,
1107
8.75k
                             txb_costs,
1108
8.75k
                             coeff_ptr,
1109
8.75k
                             qcoeff_ptr,
1110
8.75k
                             dqcoeff_ptr,
1111
8.75k
                             levels,
1112
8.75k
                             qparam->iqmatrix);
1113
8.75k
        --si;
1114
8.75k
    } else {
1115
1.57k
        assert(abs_qc == 1);
1116
1.57k
        const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si);
1117
1.57k
        accu_rate += get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, dc_sign_context, txb_costs, bwl, tx_class);
1118
1119
1.57k
        const TranLow tqc   = coeff_ptr[ci];
1120
1.57k
        const TranLow dqc   = dqcoeff_ptr[ci];
1121
1.57k
        const int64_t dist  = get_coeff_dist(tqc, dqc, shift);
1122
1.57k
        const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1123
1.57k
        accu_dist += dist - dist0;
1124
1.57k
        --si;
1125
1.57k
    }
1126
10.3k
#define UPDATE_COEFF_EOB_CASE(tx_class_literal)         \
1127
10.3k
    case tx_class_literal:                              \
1128
10.3k
        for (; si >= 0 && nz_num <= max_nz_num; --si) { \
1129
0
            update_coeff_eob(&accu_rate,                \
1130
0
                             &accu_dist,                \
1131
0
                             eob,                       \
1132
0
                             &nz_num,                   \
1133
0
                             nz_ci,                     \
1134
0
                             si,                        \
1135
0
                             tx_size,                   \
1136
0
                             tx_class_literal,          \
1137
0
                             bwl,                       \
1138
0
                             height,                    \
1139
0
                             dc_sign_context,           \
1140
0
                             rdmult,                    \
1141
0
                             shift,                     \
1142
0
                             p->dequant_qtx,            \
1143
0
                             scan,                      \
1144
0
                             txb_eob_costs,             \
1145
0
                             txb_costs,                 \
1146
0
                             coeff_ptr,                 \
1147
0
                             qcoeff_ptr,                \
1148
0
                             dqcoeff_ptr,               \
1149
0
                             levels,                    \
1150
0
                             sharpness,                 \
1151
0
                             qparam->iqmatrix);         \
1152
0
        }                                               \
1153
10.3k
        break;
1154
10.3k
    switch (tx_class) {
1155
10.3k
        UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
1156
0
        UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
1157
0
        UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
1158
0
#undef UPDATE_COEFF_EOB_CASE
1159
0
    default:
1160
0
        assert(false);
1161
10.3k
    }
1162
1163
10.3k
    if (si == -1 && nz_num <= max_nz_num) {
1164
10.3k
        update_skip(&accu_rate,
1165
10.3k
                    accu_dist,
1166
10.3k
                    eob,
1167
10.3k
                    nz_num,
1168
10.3k
                    nz_ci,
1169
10.3k
                    rdmult,
1170
10.3k
                    skip_cost,
1171
10.3k
                    non_skip_cost,
1172
10.3k
                    qcoeff_ptr,
1173
10.3k
                    dqcoeff_ptr,
1174
10.3k
                    sharpness);
1175
10.3k
    }
1176
1177
10.3k
    int si_end = 1; // default: full RDOQ
1178
10.3k
    if (ctx->rdoq_ctrls.cut_off_num) {
1179
10.3k
        const int cut_off_coeff = AOMMAX((width * height) >> 7,
1180
10.3k
                                         (*eob * ctx->rdoq_ctrls.cut_off_num) / ctx->rdoq_ctrls.cut_off_denum);
1181
10.3k
        si_end                  = AOMMAX(1, *eob - cut_off_coeff);
1182
10.3k
    }
1183
10.3k
#define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \
1184
10.3k
    case tx_class_literal:                         \
1185
10.3k
        for (; si >= si_end; --si) {               \
1186
0
            update_coeff_simple(&accu_rate,        \
1187
0
                                si,                \
1188
0
                                *eob,              \
1189
0
                                tx_size,           \
1190
0
                                tx_class_literal,  \
1191
0
                                bwl,               \
1192
0
                                rdmult,            \
1193
0
                                shift,             \
1194
0
                                p->dequant_qtx,    \
1195
0
                                scan,              \
1196
0
                                txb_costs,         \
1197
0
                                coeff_ptr,         \
1198
0
                                qcoeff_ptr,        \
1199
0
                                dqcoeff_ptr,       \
1200
0
                                levels,            \
1201
0
                                qparam->iqmatrix); \
1202
0
        }                                          \
1203
10.3k
        break;
1204
10.3k
    switch (tx_class) {
1205
10.3k
        UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
1206
0
        UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
1207
0
        UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
1208
0
#undef UPDATE_COEFF_SIMPLE_CASE
1209
0
    default:
1210
0
        assert(false);
1211
10.3k
    }
1212
1213
    // DC position
1214
10.3k
    if (si == 0) {
1215
        // no need to update accu_dist because it's not used after this point
1216
0
        int64_t dummy_dist = 0;
1217
0
        update_coeff_general(&accu_rate,
1218
0
                             &dummy_dist,
1219
0
                             si,
1220
0
                             *eob,
1221
0
                             tx_size,
1222
0
                             tx_class,
1223
0
                             bwl,
1224
0
                             height,
1225
0
                             rdmult,
1226
0
                             shift,
1227
0
                             dc_sign_context,
1228
0
                             p->dequant_qtx,
1229
0
                             scan,
1230
0
                             txb_costs,
1231
0
                             coeff_ptr,
1232
0
                             qcoeff_ptr,
1233
0
                             dqcoeff_ptr,
1234
0
                             levels,
1235
0
                             qparam->iqmatrix);
1236
0
    }
1237
10.3k
}
1238
1239
1.22M
static INLINE TxSize aom_av1_get_adjusted_tx_size(TxSize tx_size) {
1240
1.22M
    switch (tx_size) {
1241
3.21k
    case TX_64X64:
1242
5.55k
    case TX_64X32:
1243
5.55k
    case TX_32X64:
1244
5.55k
        return TX_32X32;
1245
0
    case TX_64X16:
1246
0
        return TX_32X16;
1247
0
    case TX_16X64:
1248
0
        return TX_16X32;
1249
1.22M
    default:
1250
1.22M
        return tx_size;
1251
1.22M
    }
1252
1.22M
}
1253
1254
void svt_aom_quantize_inv_quantize_light(PictureControlSet* pcs, int32_t* coeff, int32_t* quant_coeff,
1255
                                         int32_t* recon_coeff, uint32_t qindex, TxSize txsize, uint16_t* eob,
1256
7.54k
                                         uint32_t bit_depth, TxType tx_type) {
1257
7.54k
    EncodeContext* enc_ctx = pcs->scs->enc_ctx;
1258
1259
7.54k
    uint32_t q_index = qindex;
1260
1261
7.54k
    const ScanOrder* const scan_order = get_scan_order(txsize, tx_type);
1262
1263
7.54k
    const int32_t n_coeffs = av1_get_max_eob(txsize);
1264
1265
7.54k
    int32_t qmatrix_level = (IS_2D_TRANSFORM(tx_type) && pcs->ppcs->frm_hdr.quantization_params.using_qmatrix)
1266
1267
7.54k
        ? pcs->ppcs->frm_hdr.quantization_params.qm[PLANE_Y]
1268
1269
7.54k
        : NUM_QM_LEVELS - 1;
1270
1271
7.54k
    TxSize adjusted_tx_size = aom_av1_get_adjusted_tx_size(txsize);
1272
1273
7.54k
    const QmVal* q_matrix = pcs->ppcs->gqmatrix[qmatrix_level][PLANE_Y][adjusted_tx_size];
1274
1275
7.54k
    const QmVal* iq_matrix = pcs->ppcs->giqmatrix[qmatrix_level][PLANE_Y][adjusted_tx_size];
1276
1277
7.54k
    if (q_matrix == NULL && iq_matrix == NULL) {
1278
7.54k
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1279
7.54k
        if (bit_depth > EB_EIGHT_BIT) {
1280
0
            svt_aom_highbd_quantize_b((TranLow*)coeff,
1281
0
                                      n_coeffs,
1282
0
                                      enc_ctx->quants_bd.y_zbin[q_index],
1283
0
                                      enc_ctx->quants_bd.y_round[q_index],
1284
0
                                      enc_ctx->quants_bd.y_quant[q_index],
1285
0
                                      enc_ctx->quants_bd.y_quant_shift[q_index],
1286
0
                                      quant_coeff,
1287
0
                                      (TranLow*)recon_coeff,
1288
0
                                      enc_ctx->deq_bd.v_dequant_qtx[q_index],
1289
0
                                      eob,
1290
0
                                      scan_order->scan,
1291
0
                                      scan_order->iscan,
1292
0
                                      q_matrix,
1293
0
                                      iq_matrix,
1294
0
                                      av1_get_tx_scale_tab[txsize]);
1295
0
        } else
1296
#else
1297
        UNUSED(bit_depth);
1298
#endif
1299
7.54k
        {
1300
7.54k
            svt_aom_quantize_b((TranLow*)coeff,
1301
7.54k
                               n_coeffs,
1302
7.54k
                               enc_ctx->quants_8bit.v_zbin[q_index],
1303
7.54k
                               enc_ctx->quants_8bit.v_round[q_index],
1304
7.54k
                               enc_ctx->quants_8bit.v_quant[q_index],
1305
7.54k
                               enc_ctx->quants_8bit.v_quant_shift[q_index],
1306
7.54k
                               quant_coeff,
1307
7.54k
                               (TranLow*)recon_coeff,
1308
7.54k
                               enc_ctx->deq_8bit.y_dequant_qtx[q_index],
1309
7.54k
                               eob,
1310
7.54k
                               scan_order->scan,
1311
7.54k
                               scan_order->iscan,
1312
7.54k
                               q_matrix,
1313
7.54k
                               iq_matrix,
1314
7.54k
                               av1_get_tx_scale_tab[txsize]);
1315
7.54k
        }
1316
7.54k
    } else {
1317
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1318
0
        if (bit_depth > EB_EIGHT_BIT) {
1319
0
            svt_av1_highbd_quantize_b_qm((TranLow*)coeff,
1320
0
                                         n_coeffs,
1321
0
                                         enc_ctx->quants_bd.y_zbin[q_index],
1322
0
                                         enc_ctx->quants_bd.y_round[q_index],
1323
0
                                         enc_ctx->quants_bd.y_quant[q_index],
1324
0
                                         enc_ctx->quants_bd.y_quant_shift[q_index],
1325
0
                                         quant_coeff,
1326
0
                                         (TranLow*)recon_coeff,
1327
0
                                         enc_ctx->deq_bd.v_dequant_qtx[q_index],
1328
0
                                         eob,
1329
0
                                         scan_order->scan,
1330
0
                                         scan_order->iscan,
1331
0
                                         q_matrix,
1332
0
                                         iq_matrix,
1333
0
                                         av1_get_tx_scale_tab[txsize]);
1334
0
        } else
1335
0
#endif
1336
0
        {
1337
0
            svt_av1_quantize_b_qm((TranLow*)coeff,
1338
0
                                  n_coeffs,
1339
0
                                  enc_ctx->quants_8bit.v_zbin[q_index],
1340
0
                                  enc_ctx->quants_8bit.v_round[q_index],
1341
0
                                  enc_ctx->quants_8bit.v_quant[q_index],
1342
0
                                  enc_ctx->quants_8bit.v_quant_shift[q_index],
1343
0
                                  quant_coeff,
1344
0
                                  (TranLow*)recon_coeff,
1345
0
                                  enc_ctx->deq_8bit.y_dequant_qtx[q_index],
1346
0
                                  eob,
1347
0
                                  scan_order->scan,
1348
0
                                  scan_order->iscan,
1349
0
                                  q_matrix,
1350
0
                                  iq_matrix,
1351
0
                                  av1_get_tx_scale_tab[txsize]);
1352
0
        }
1353
0
    }
1354
7.54k
}
1355
1356
// See av1_get_txb_entropy_context in libaom
1357
0
uint8_t svt_av1_compute_cul_level_c(const int16_t* const scan, const int32_t* const quant_coeff, uint16_t* eob) {
1358
0
    int32_t cul_level = 0;
1359
0
    for (int32_t c = 0; c < *eob; ++c) {
1360
0
        const int16_t pos   = scan[c];
1361
0
        const int32_t v     = quant_coeff[pos];
1362
0
        int32_t       level = ABS(v);
1363
0
        cul_level += level;
1364
        // Early exit the loop if cul_level reaches COEFF_CONTEXT_MASK
1365
0
        if (cul_level >= COEFF_CONTEXT_MASK) {
1366
0
            break;
1367
0
        }
1368
0
    }
1369
1370
0
    cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
1371
    // DC value
1372
0
    set_dc_sign(&cul_level, quant_coeff[0]);
1373
0
    return (uint8_t)cul_level;
1374
0
}
1375
1376
#if OPT_COEFF_SHAVING
1377
1378
// Retract EOB by removing trailing low-magnitude coefficients separated by zero gaps,
1379
// then compute energy on the reduced block and optionally zero it entirely if energy is low.
1380
// Returns the updated EOB (0 = block became skip).
1381
static INLINE uint16_t shave_coeff(int32_t* quant_buf, int32_t* recon_buf, uint16_t eob, TxSize tx_size, TxType tx_type,
1382
0
                                   const CoeffShavingCtrls* ctrls) {
1383
0
    const int16_t* const scan = get_scan_order(tx_size, tx_type)->scan;
1384
1385
0
    const int level_th = ctrls->level_threshold;
1386
0
    const int gap_th   = ctrls->zero_gap_threshold;
1387
1388
0
    int updated_eob = (int)eob;
1389
1390
    // -------------------------
1391
    // Phase 1: EOB retraction
1392
    // -------------------------
1393
0
    while (updated_eob > 1) {
1394
0
        const int     last_pos = scan[updated_eob - 1];
1395
0
        const int32_t val      = quant_buf[last_pos];
1396
0
        const int32_t abs_val  = (val >= 0) ? val : -val;
1397
1398
0
        if (abs_val > level_th) {
1399
0
            break;
1400
0
        }
1401
1402
        // Find previous non-zero coefficient
1403
0
        int next_nz = updated_eob - 2;
1404
0
        while (next_nz >= 0 && quant_buf[scan[next_nz]] == 0) {
1405
0
            --next_nz;
1406
0
        }
1407
1408
0
        if (next_nz < 0) {
1409
0
            break;
1410
0
        }
1411
1412
        // Gap check
1413
0
        const int gap = (updated_eob - 1) - next_nz - 1;
1414
0
        if (gap < gap_th) {
1415
0
            break;
1416
0
        }
1417
1418
        // Zero trailing coefficient
1419
0
        quant_buf[last_pos] = 0;
1420
0
        recon_buf[last_pos] = 0;
1421
1422
0
        updated_eob = next_nz + 1;
1423
0
    }
1424
1425
    // -------------------------
1426
    // Phase 2: energy check (post-shaving)
1427
    // -------------------------
1428
0
    const int skip_th = ctrls->skip_energy_threshold;
1429
0
    if (skip_th > 0 && updated_eob > 0) {
1430
0
        int32_t total_energy = 0;
1431
1432
0
        for (int c = 0; c < updated_eob; ++c) {
1433
0
            const int32_t v = quant_buf[scan[c]];
1434
0
            total_energy += (v >= 0) ? v : -v;
1435
1436
0
            if (total_energy > skip_th) {
1437
0
                break;
1438
0
            }
1439
0
        }
1440
1441
0
        if (total_energy <= skip_th) {
1442
            // Zero entire block
1443
0
            for (int c = 0; c < updated_eob; ++c) {
1444
0
                const int pos  = scan[c];
1445
0
                quant_buf[pos] = 0;
1446
0
                recon_buf[pos] = 0;
1447
0
            }
1448
0
            return 0;
1449
0
        }
1450
0
    }
1451
1452
0
    return (uint16_t)updated_eob;
1453
0
}
1454
1455
#endif
1456
1457
uint8_t svt_aom_quantize_inv_quantize(PictureControlSet* pcs, ModeDecisionContext* ctx, int32_t* coeff,
1458
                                      int32_t* quant_coeff, int32_t* recon_coeff, uint32_t qindex,
1459
                                      int32_t segmentation_qp_offset, TxSize txsize, uint16_t* eob,
1460
                                      uint32_t component_type, uint32_t bit_depth, TxType tx_type,
1461
                                      int16_t txb_skip_context, int16_t dc_sign_context, PredictionMode pred_mode,
1462
1.21M
                                      uint32_t lambda, bool is_encode_pass) {
1463
1.21M
    SequenceControlSet* scs     = pcs->scs;
1464
1.21M
    EncodeContext*      enc_ctx = scs->enc_ctx;
1465
1.21M
    int32_t             plane   = component_type == COMPONENT_LUMA ? PLANE_Y
1466
1.21M
                                                                   : (component_type == COMPONENT_CHROMA_CB ? PLANE_U : PLANE_V);
1467
1468
1.21M
    int32_t qmatrix_level = (IS_2D_TRANSFORM(tx_type) && pcs->ppcs->frm_hdr.quantization_params.using_qmatrix)
1469
1.21M
        ? pcs->ppcs->frm_hdr.quantization_params.qm[plane]
1470
1.21M
        : NUM_QM_LEVELS - 1;
1471
1472
1.21M
    TxSize          adjusted_tx_size = aom_av1_get_adjusted_tx_size(txsize);
1473
1.21M
    MacroblockPlane candidate_plane;
1474
1.21M
    const QmVal*    q_matrix  = pcs->ppcs->gqmatrix[qmatrix_level][plane][adjusted_tx_size];
1475
1.21M
    const QmVal*    iq_matrix = pcs->ppcs->giqmatrix[qmatrix_level][plane][adjusted_tx_size];
1476
1.21M
    int32_t         q_index   = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present
1477
1.21M
                  ? qindex
1478
1.21M
                  : pcs->ppcs->frm_hdr.quantization_params.base_q_idx;
1479
1.21M
    if (segmentation_qp_offset != 0) {
1480
0
        q_index = CLIP3(0, 255, q_index + segmentation_qp_offset);
1481
0
    }
1482
1.21M
    if (component_type != COMPONENT_LUMA) {
1483
256k
        const int8_t offset = (component_type == COMPONENT_CHROMA_CB)
1484
256k
            ? pcs->ppcs->frm_hdr.quantization_params.delta_q_dc[1] // we are assuming delta_q_ac == delta_q_dc
1485
256k
            : pcs->ppcs->frm_hdr.quantization_params.delta_q_dc[2];
1486
256k
        q_index += offset;
1487
256k
        q_index = (uint32_t)CLIP3(0, 255, (int32_t)q_index);
1488
256k
    }
1489
1.22M
    if (bit_depth == EB_EIGHT_BIT) {
1490
1.22M
        if (component_type == COMPONENT_LUMA) {
1491
965k
            candidate_plane.quant_qtx       = enc_ctx->quants_8bit.y_quant[q_index];
1492
965k
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_8bit.y_quant_fp[q_index];
1493
965k
            candidate_plane.round_fp_qtx    = enc_ctx->quants_8bit.y_round_fp[q_index];
1494
965k
            candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.y_quant_shift[q_index];
1495
965k
            candidate_plane.zbin_qtx        = enc_ctx->quants_8bit.y_zbin[q_index];
1496
965k
            candidate_plane.round_qtx       = enc_ctx->quants_8bit.y_round[q_index];
1497
965k
            candidate_plane.dequant_qtx     = enc_ctx->deq_8bit.y_dequant_qtx[q_index];
1498
965k
        } else if (component_type == COMPONENT_CHROMA_CB) {
1499
128k
            candidate_plane.quant_qtx       = enc_ctx->quants_8bit.u_quant[q_index];
1500
128k
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_8bit.u_quant_fp[q_index];
1501
128k
            candidate_plane.round_fp_qtx    = enc_ctx->quants_8bit.u_round_fp[q_index];
1502
128k
            candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.u_quant_shift[q_index];
1503
128k
            candidate_plane.zbin_qtx        = enc_ctx->quants_8bit.u_zbin[q_index];
1504
128k
            candidate_plane.round_qtx       = enc_ctx->quants_8bit.u_round[q_index];
1505
128k
            candidate_plane.dequant_qtx     = enc_ctx->deq_8bit.u_dequant_qtx[q_index];
1506
128k
        }
1507
1508
127k
        else {
1509
127k
            candidate_plane.quant_qtx       = enc_ctx->quants_8bit.v_quant[q_index];
1510
127k
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_8bit.v_quant_fp[q_index];
1511
127k
            candidate_plane.round_fp_qtx    = enc_ctx->quants_8bit.v_round_fp[q_index];
1512
127k
            candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.v_quant_shift[q_index];
1513
127k
            candidate_plane.zbin_qtx        = enc_ctx->quants_8bit.v_zbin[q_index];
1514
127k
            candidate_plane.round_qtx       = enc_ctx->quants_8bit.v_round[q_index];
1515
127k
            candidate_plane.dequant_qtx     = enc_ctx->deq_8bit.v_dequant_qtx[q_index];
1516
127k
        }
1517
18.4E
    } else {
1518
18.4E
        if (component_type == COMPONENT_LUMA) {
1519
0
            candidate_plane.quant_qtx       = enc_ctx->quants_bd.y_quant[q_index];
1520
0
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_bd.y_quant_fp[q_index];
1521
0
            candidate_plane.round_fp_qtx    = enc_ctx->quants_bd.y_round_fp[q_index];
1522
0
            candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.y_quant_shift[q_index];
1523
0
            candidate_plane.zbin_qtx        = enc_ctx->quants_bd.y_zbin[q_index];
1524
0
            candidate_plane.round_qtx       = enc_ctx->quants_bd.y_round[q_index];
1525
0
            candidate_plane.dequant_qtx     = enc_ctx->deq_bd.y_dequant_qtx[q_index];
1526
0
        }
1527
1528
18.4E
        else if (component_type == COMPONENT_CHROMA_CB) {
1529
0
            candidate_plane.quant_qtx       = enc_ctx->quants_bd.u_quant[q_index];
1530
0
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_bd.u_quant_fp[q_index];
1531
0
            candidate_plane.round_fp_qtx    = enc_ctx->quants_bd.u_round_fp[q_index];
1532
0
            candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.u_quant_shift[q_index];
1533
0
            candidate_plane.zbin_qtx        = enc_ctx->quants_bd.u_zbin[q_index];
1534
0
            candidate_plane.round_qtx       = enc_ctx->quants_bd.u_round[q_index];
1535
0
            candidate_plane.dequant_qtx     = enc_ctx->deq_bd.u_dequant_qtx[q_index];
1536
0
        }
1537
1538
18.4E
        else {
1539
18.4E
            candidate_plane.quant_qtx       = enc_ctx->quants_bd.v_quant[q_index];
1540
18.4E
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_bd.v_quant_fp[q_index];
1541
18.4E
            candidate_plane.round_fp_qtx    = enc_ctx->quants_bd.v_round_fp[q_index];
1542
18.4E
            candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.v_quant_shift[q_index];
1543
18.4E
            candidate_plane.zbin_qtx        = enc_ctx->quants_bd.v_zbin[q_index];
1544
18.4E
            candidate_plane.round_qtx       = enc_ctx->quants_bd.v_round[q_index];
1545
18.4E
            candidate_plane.dequant_qtx     = enc_ctx->deq_bd.v_dequant_qtx[q_index];
1546
18.4E
        }
1547
18.4E
    }
1548
1549
1.21M
    const ScanOrder* const scan_order = get_scan_order(txsize, tx_type);
1550
1551
1.21M
    const int32_t n_coeffs = av1_get_max_eob(txsize);
1552
1553
1.21M
    QuantParam qparam;
1554
1555
1.21M
    qparam.log_scale = av1_get_tx_scale_tab[txsize];
1556
1.21M
    qparam.tx_size   = txsize;
1557
1.21M
    qparam.qmatrix   = q_matrix;
1558
1.21M
    qparam.iqmatrix  = iq_matrix;
1559
1560
1.21M
    bool is_inter = (pred_mode >= NEARESTMV);
1561
1.21M
    bool perform_rdoq;
1562
1563
    // If rdoq_level is specified in the command line instruction, set perform_rdoq accordingly.
1564
1.21M
    perform_rdoq = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) &&
1565
22.6k
        ((ctx->mds_do_rdoq || is_encode_pass) && ctx->rdoq_ctrls.enabled);
1566
1.22M
    if (!is_encode_pass) {
1567
1.22M
        if ((ctx->rdoq_ctrls.dct_dct_only && tx_type != DCT_DCT) ||
1568
1.22M
            (ctx->rdoq_ctrls.skip_uv && component_type != COMPONENT_LUMA)) {
1569
0
            perform_rdoq = 0;
1570
0
        }
1571
1.22M
    }
1572
1.21M
    if (perform_rdoq) {
1573
22.6k
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1574
22.6k
        if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) {
1575
0
            svt_av1_highbd_quantize_fp_facade((TranLow*)coeff,
1576
0
                                              n_coeffs,
1577
0
                                              &candidate_plane,
1578
0
                                              quant_coeff,
1579
0
                                              (TranLow*)recon_coeff,
1580
0
                                              eob,
1581
0
                                              scan_order,
1582
0
                                              &qparam);
1583
0
        } else
1584
22.6k
#endif
1585
22.6k
        {
1586
22.6k
            svt_av1_quantize_fp_facade((TranLow*)coeff,
1587
22.6k
                                       n_coeffs,
1588
22.6k
                                       &candidate_plane,
1589
22.6k
                                       quant_coeff,
1590
22.6k
                                       (TranLow*)recon_coeff,
1591
22.6k
                                       eob,
1592
22.6k
                                       scan_order,
1593
22.6k
                                       &qparam);
1594
22.6k
        }
1595
1.19M
    } else {
1596
1.19M
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1597
1.19M
        if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) {
1598
0
            svt_av1_highbd_quantize_b_facade((TranLow*)coeff,
1599
0
                                             n_coeffs,
1600
0
                                             &candidate_plane,
1601
0
                                             quant_coeff,
1602
0
                                             (TranLow*)recon_coeff,
1603
0
                                             eob,
1604
0
                                             scan_order,
1605
0
                                             &qparam);
1606
0
        } else
1607
1.19M
#endif
1608
1.19M
        {
1609
1.19M
            av1_quantize_b_facade_ii((TranLow*)coeff,
1610
1.19M
                                     n_coeffs,
1611
1.19M
                                     &candidate_plane,
1612
1.19M
                                     quant_coeff,
1613
1.19M
                                     (TranLow*)recon_coeff,
1614
1.19M
                                     eob,
1615
1.19M
                                     scan_order,
1616
1.19M
                                     &qparam);
1617
1.19M
        }
1618
1.19M
    }
1619
1.21M
    if (perform_rdoq && *eob != 0) {
1620
10.3k
        int width    = tx_size_wide[txsize];
1621
10.3k
        int height   = tx_size_high[txsize];
1622
10.3k
        int eob_perc = (*eob) * 100 / (width * height);
1623
10.3k
        if (eob_perc >= ctx->rdoq_ctrls.eob_th) {
1624
0
            perform_rdoq = 0;
1625
0
        }
1626
10.3k
        if (perform_rdoq && (eob_perc >= ctx->rdoq_ctrls.eob_fast_th)) {
1627
0
            svt_fast_optimize_b(
1628
0
                (TranLow*)coeff, &candidate_plane, quant_coeff, (TranLow*)recon_coeff, eob, txsize, tx_type);
1629
0
        }
1630
10.3k
        if (perform_rdoq == 0) {
1631
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1632
0
            if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) {
1633
0
                svt_av1_highbd_quantize_b_facade((TranLow*)coeff,
1634
0
                                                 n_coeffs,
1635
0
                                                 &candidate_plane,
1636
0
                                                 quant_coeff,
1637
0
                                                 (TranLow*)recon_coeff,
1638
0
                                                 eob,
1639
0
                                                 scan_order,
1640
0
                                                 &qparam);
1641
0
            } else
1642
0
#endif
1643
0
            {
1644
0
                av1_quantize_b_facade_ii((TranLow*)coeff,
1645
0
                                         n_coeffs,
1646
0
                                         &candidate_plane,
1647
0
                                         quant_coeff,
1648
0
                                         (TranLow*)recon_coeff,
1649
0
                                         eob,
1650
0
                                         scan_order,
1651
0
                                         &qparam);
1652
0
            }
1653
0
        }
1654
10.3k
    }
1655
1.21M
    if (perform_rdoq && *eob != 0) {
1656
        // Perform rdoq
1657
10.3k
        svt_av1_optimize_b(pcs,
1658
10.3k
                           ctx,
1659
10.3k
                           txb_skip_context,
1660
10.3k
                           dc_sign_context,
1661
10.3k
                           (TranLow*)coeff,
1662
10.3k
                           &candidate_plane,
1663
10.3k
                           quant_coeff,
1664
10.3k
                           (TranLow*)recon_coeff,
1665
10.3k
                           eob,
1666
10.3k
                           &qparam,
1667
10.3k
                           txsize,
1668
10.3k
                           tx_type,
1669
10.3k
                           is_inter,
1670
10.3k
                           scs->vq_ctrls.sharpness_ctrls.rdoq,
1671
10.3k
                           pcs->ppcs->frm_hdr.delta_q_params.delta_q_present,
1672
10.3k
                           pcs->ppcs->picture_qp,
1673
10.3k
                           lambda,
1674
10.3k
                           (component_type == COMPONENT_LUMA) ? 0 : 1);
1675
10.3k
    }
1676
1677
1.21M
#if OPT_COEFF_SHAVING
1678
    // Apply coefficient shaving for luma after all quantization/RDOQ is complete.
1679
    // This catches all luma quantize paths (light PD1, regular TX, encode pass)
1680
    // in a single place.
1681
1.21M
    if (component_type == COMPONENT_LUMA && ctx->coeff_shaving_ctrls.enabled && *eob > 1) {
1682
0
        *eob = shave_coeff(quant_coeff, recon_coeff, *eob, txsize, tx_type, &ctx->coeff_shaving_ctrls);
1683
0
    }
1684
1.21M
#endif
1685
1686
1.21M
    if (!ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) {
1687
1.19M
        return 0;
1688
1.19M
    }
1689
1690
    // Derive cul_level
1691
20.4k
    return svt_av1_compute_cul_level(scan_order->scan, quant_coeff, eob);
1692
1.21M
}
1693
1694
void svt_aom_inv_transform_recon_wrapper(PictureControlSet* pcs, ModeDecisionContext* ctx, uint8_t* pred_buffer,
1695
                                         uint32_t pred_offset, uint32_t pred_stride, uint8_t* rec_buffer,
1696
                                         uint32_t rec_offset, uint32_t rec_stride, int32_t* rec_coeff_buffer,
1697
                                         uint32_t coeff_offset, bool hbd, TxSize txsize, TxType transform_type,
1698
37.2k
                                         PlaneType component_type, uint32_t eob) {
1699
37.2k
    if (hbd) {
1700
0
        svt_aom_inv_transform_recon(rec_coeff_buffer + coeff_offset,
1701
0
                                    CONVERT_TO_BYTEPTR(((uint16_t*)pred_buffer) + pred_offset),
1702
0
                                    pred_stride,
1703
0
                                    CONVERT_TO_BYTEPTR(((uint16_t*)rec_buffer) + rec_offset),
1704
0
                                    rec_stride,
1705
0
                                    txsize,
1706
0
                                    EB_TEN_BIT,
1707
0
                                    transform_type,
1708
0
                                    component_type,
1709
0
                                    eob,
1710
0
                                    svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id));
1711
37.2k
    } else {
1712
37.2k
        svt_aom_inv_transform_recon8bit(rec_coeff_buffer + coeff_offset,
1713
37.2k
                                        pred_buffer + pred_offset,
1714
37.2k
                                        pred_stride,
1715
37.2k
                                        rec_buffer + rec_offset,
1716
37.2k
                                        rec_stride,
1717
37.2k
                                        txsize,
1718
37.2k
                                        transform_type,
1719
37.2k
                                        component_type,
1720
37.2k
                                        eob,
1721
37.2k
                                        svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id));
1722
37.2k
    }
1723
37.2k
}
1724
1725
/*
1726
  tx path for light PD1 chroma
1727
*/
1728
void svt_aom_full_loop_chroma_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
1729
                                        ModeDecisionCandidateBuffer* cand_bf, EbPictureBufferDesc* input_pic,
1730
                                        uint32_t input_cb_origin_in_index, uint32_t blk_chroma_origin_index,
1731
                                        COMPONENT_TYPE component_type, uint32_t chroma_qindex,
1732
                                        uint64_t cb_full_distortion[DIST_CALC_TOTAL],
1733
                                        uint64_t cr_full_distortion[DIST_CALC_TOTAL], uint64_t* cb_coeff_bits,
1734
0
                                        uint64_t* cr_coeff_bits) {
1735
0
    uint32_t     full_lambda  = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
1736
0
    const TxSize tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
1737
0
    const int    tx_width_uv  = tx_size_wide[tx_size_uv];
1738
0
    const int    tx_height_uv = tx_size_high[tx_size_uv];
1739
1740
0
    TxCoeffShape pf_shape = ctx->pf_ctrls.pf_shape;
1741
    // If Cb component not detected as complex, can use TX shortcuts
1742
0
    if (ctx->use_tx_shortcuts_mds3 &&
1743
0
        (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CR)) {
1744
0
        pf_shape = N4_SHAPE;
1745
0
    } else {
1746
0
        uint8_t use_pfn4_cond = 0;
1747
0
        if (ctx->lpd1_tx_ctrls.use_uv_shortcuts_on_y_coeffs &&
1748
0
            (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CR)) {
1749
0
            const uint16_t th = ((tx_width_uv >> 4) * (tx_height_uv >> 4));
1750
0
            use_pfn4_cond     = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0;
1751
0
        }
1752
0
        if (use_pfn4_cond) {
1753
0
            pf_shape = N4_SHAPE;
1754
0
        }
1755
0
    }
1756
0
    assert(tx_size_uv < TX_SIZES_ALL);
1757
0
    const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2;
1758
0
    uint32_t      bwidth       = tx_width_uv;
1759
0
    uint32_t      bheight      = tx_height_uv;
1760
0
    if (pf_shape) {
1761
0
        bwidth  = MAX((bwidth >> pf_shape), 4);
1762
0
        bheight = (bheight >> pf_shape);
1763
0
    }
1764
0
    if (component_type == COMPONENT_CHROMA || component_type == COMPONENT_CHROMA_CB) {
1765
0
        svt_aom_residual_kernel(input_pic->u_buffer,
1766
0
                                input_cb_origin_in_index,
1767
0
                                input_pic->u_stride,
1768
0
                                cand_bf->pred->u_buffer,
1769
0
                                blk_chroma_origin_index,
1770
0
                                cand_bf->pred->u_stride,
1771
0
                                (int16_t*)cand_bf->residual->u_buffer,
1772
0
                                blk_chroma_origin_index,
1773
0
                                cand_bf->residual->u_stride,
1774
0
                                ctx->hbd_md,
1775
0
                                ctx->blk_geom->bwidth_uv,
1776
0
                                ctx->blk_geom->bheight_uv);
1777
1778
        // Cb Transform
1779
0
        svt_aom_estimate_transform(pcs,
1780
0
                                   ctx,
1781
0
                                   &(((int16_t*)cand_bf->residual->u_buffer)[blk_chroma_origin_index]),
1782
0
                                   cand_bf->residual->u_stride,
1783
0
                                   &(((int32_t*)ctx->tx_coeffs->u_buffer)[0]),
1784
0
                                   NOT_USED_VALUE,
1785
0
                                   tx_size_uv,
1786
0
                                   &ctx->three_quad_energy,
1787
0
                                   ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1788
0
                                   cand_bf->cand->transform_type_uv,
1789
0
                                   PLANE_TYPE_UV,
1790
0
                                   pf_shape);
1791
0
        cand_bf->quant_dc.u[0] = svt_aom_quantize_inv_quantize(pcs,
1792
0
                                                               ctx,
1793
0
                                                               &(((int32_t*)ctx->tx_coeffs->u_buffer)[0]),
1794
0
                                                               &(((int32_t*)cand_bf->quant->u_buffer)[0]),
1795
0
                                                               &(((int32_t*)cand_bf->rec_coeff->u_buffer)[0]),
1796
0
                                                               chroma_qindex,
1797
0
                                                               0,
1798
0
                                                               tx_size_uv,
1799
0
                                                               &cand_bf->eob.u[0],
1800
0
                                                               COMPONENT_CHROMA_CB,
1801
0
                                                               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1802
0
                                                               cand_bf->cand->transform_type_uv,
1803
0
                                                               0,
1804
0
                                                               0,
1805
0
                                                               cand_bf->cand->block_mi.mode,
1806
0
                                                               full_lambda,
1807
0
                                                               false);
1808
1809
0
        svt_aom_picture_full_distortion32_bits_single(&(((int32_t*)ctx->tx_coeffs->u_buffer)[0]),
1810
0
                                                      &(((int32_t*)cand_bf->rec_coeff->u_buffer)[0]),
1811
0
                                                      tx_width_uv,
1812
0
                                                      bwidth,
1813
0
                                                      bheight,
1814
0
                                                      cb_full_distortion,
1815
0
                                                      cand_bf->eob.u[0]);
1816
0
        cb_full_distortion[DIST_CALC_RESIDUAL]   = RIGHT_SIGNED_SHIFT(cb_full_distortion[DIST_CALC_RESIDUAL],
1817
0
                                                                    chroma_shift);
1818
0
        cb_full_distortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(cb_full_distortion[DIST_CALC_PREDICTION],
1819
0
                                                                      chroma_shift);
1820
0
        cand_bf->u_has_coeff                     = (cand_bf->eob.u[0] > 0);
1821
0
    }
1822
1823
0
    pf_shape = ctx->pf_ctrls.pf_shape;
1824
    // If Cr component not detected as complex, can use TX shortcuts
1825
0
    if (ctx->use_tx_shortcuts_mds3 &&
1826
0
        (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CB)) {
1827
0
        pf_shape = N4_SHAPE;
1828
0
    } else {
1829
0
        uint8_t use_pfn4_cond = 0;
1830
0
        if (ctx->lpd1_tx_ctrls.use_uv_shortcuts_on_y_coeffs &&
1831
0
            (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CB)) {
1832
0
            const uint16_t th = ((tx_width_uv >> 4) * (tx_height_uv >> 4));
1833
0
            use_pfn4_cond     = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0;
1834
0
        }
1835
0
        if (use_pfn4_cond) {
1836
0
            pf_shape = N4_SHAPE;
1837
0
        }
1838
0
    }
1839
0
    bwidth  = tx_width_uv;
1840
0
    bheight = tx_height_uv;
1841
0
    if (pf_shape) {
1842
0
        bwidth  = MAX((bwidth >> pf_shape), 4);
1843
0
        bheight = (bheight >> pf_shape);
1844
0
    }
1845
1846
0
    if (component_type == COMPONENT_CHROMA || component_type == COMPONENT_CHROMA_CR) {
1847
        //Cr Residual
1848
0
        svt_aom_residual_kernel(input_pic->v_buffer,
1849
0
                                input_cb_origin_in_index,
1850
0
                                input_pic->v_stride,
1851
0
                                cand_bf->pred->v_buffer,
1852
0
                                blk_chroma_origin_index,
1853
0
                                cand_bf->pred->v_stride,
1854
0
                                (int16_t*)cand_bf->residual->v_buffer,
1855
0
                                blk_chroma_origin_index,
1856
0
                                cand_bf->residual->v_stride,
1857
0
                                ctx->hbd_md,
1858
0
                                ctx->blk_geom->bwidth_uv,
1859
0
                                ctx->blk_geom->bheight_uv);
1860
        // Cr Transform
1861
0
        svt_aom_estimate_transform(pcs,
1862
0
                                   ctx,
1863
0
                                   &(((int16_t*)cand_bf->residual->v_buffer)[blk_chroma_origin_index]),
1864
0
                                   cand_bf->residual->v_stride,
1865
0
                                   &(((int32_t*)ctx->tx_coeffs->v_buffer)[0]),
1866
0
                                   NOT_USED_VALUE,
1867
0
                                   tx_size_uv,
1868
0
                                   &ctx->three_quad_energy,
1869
0
                                   ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1870
0
                                   cand_bf->cand->transform_type_uv,
1871
0
                                   PLANE_TYPE_UV,
1872
0
                                   pf_shape);
1873
0
        cand_bf->quant_dc.v[0] = svt_aom_quantize_inv_quantize(pcs,
1874
0
                                                               ctx,
1875
0
                                                               &(((int32_t*)ctx->tx_coeffs->v_buffer)[0]),
1876
0
                                                               &(((int32_t*)cand_bf->quant->v_buffer)[0]),
1877
0
                                                               &(((int32_t*)cand_bf->rec_coeff->v_buffer)[0]),
1878
0
                                                               chroma_qindex,
1879
0
                                                               0,
1880
0
                                                               tx_size_uv,
1881
0
                                                               &cand_bf->eob.v[0],
1882
0
                                                               COMPONENT_CHROMA_CR,
1883
0
                                                               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1884
0
                                                               cand_bf->cand->transform_type_uv,
1885
0
                                                               0,
1886
0
                                                               0,
1887
0
                                                               cand_bf->cand->block_mi.mode,
1888
0
                                                               full_lambda,
1889
0
                                                               false);
1890
1891
0
        svt_aom_picture_full_distortion32_bits_single(&(((int32_t*)ctx->tx_coeffs->v_buffer)[0]),
1892
0
                                                      &(((int32_t*)cand_bf->rec_coeff->v_buffer)[0]),
1893
0
                                                      tx_width_uv,
1894
0
                                                      bwidth,
1895
0
                                                      bheight,
1896
0
                                                      cr_full_distortion,
1897
0
                                                      cand_bf->eob.v[0]);
1898
1899
0
        cr_full_distortion[DIST_CALC_RESIDUAL]   = RIGHT_SIGNED_SHIFT(cr_full_distortion[DIST_CALC_RESIDUAL],
1900
0
                                                                    chroma_shift);
1901
0
        cr_full_distortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(cr_full_distortion[DIST_CALC_PREDICTION],
1902
0
                                                                      chroma_shift);
1903
0
        cand_bf->v_has_coeff                     = (cand_bf->eob.v[0] > 0);
1904
0
    }
1905
1906
    //CHROMA-ONLY
1907
0
    svt_aom_txb_estimate_coeff_bits(ctx,
1908
0
                                    0,
1909
0
                                    NULL,
1910
0
                                    pcs,
1911
0
                                    cand_bf,
1912
0
                                    NOT_USED_VALUE,
1913
0
                                    0,
1914
0
                                    cand_bf->quant,
1915
0
                                    NOT_USED_VALUE,
1916
0
                                    cand_bf->eob.u[0],
1917
0
                                    cand_bf->eob.v[0],
1918
0
                                    NOT_USED_VALUE,
1919
0
                                    cb_coeff_bits,
1920
0
                                    cr_coeff_bits,
1921
0
                                    NOT_USED_VALUE,
1922
0
                                    tx_size_uv,
1923
0
                                    NOT_USED_VALUE,
1924
0
                                    cand_bf->cand->transform_type_uv,
1925
0
                                    component_type);
1926
0
}
1927
1928
/****************************************
1929
 ************  Full loop ****************
1930
****************************************/
1931
void svt_aom_full_loop_uv(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1932
                          EbPictureBufferDesc* input_pic, COMPONENT_TYPE component_type, uint32_t chroma_qindex,
1933
                          uint64_t cb_full_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1934
                          uint64_t cr_full_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* cb_coeff_bits,
1935
127k
                          uint64_t* cr_coeff_bits, bool is_full_loop) {
1936
127k
    EbSpatialFullDistType spatial_full_dist_type_fun = ctx->hbd_md ? svt_full_distortion_kernel16_bits
1937
127k
                                                                   : svt_spatial_full_distortion_kernel;
1938
127k
    EB_ALIGN(16) uint64_t txb_full_distortion[DIST_TOTAL][3][DIST_CALC_TOTAL];
1939
127k
    const SsimLevel       ssim_level = ctx->tune_ssim_level;
1940
127k
    if (ssim_level > SSIM_LVL_0) {
1941
0
        assert(ctx->pd_pass == PD_PASS_1);
1942
0
        assert(ctx->md_stage == MD_STAGE_3);
1943
0
    }
1944
127k
    cand_bf->u_has_coeff = 0;
1945
127k
    cand_bf->v_has_coeff = 0;
1946
127k
    int16_t* chroma_residual_ptr;
1947
127k
    uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
1948
1949
127k
    ctx->three_quad_energy = 0;
1950
1951
127k
    const double effective_ac_bias = get_effective_ac_bias(
1952
127k
        pcs->scs->static_config.ac_bias, pcs->slice_type == I_SLICE, pcs->temporal_layer_index);
1953
127k
    const uint8_t tx_depth     = cand_bf->cand->block_mi.tx_depth;
1954
127k
    const TxSize  tx_size      = av1_get_tx_size(ctx->blk_geom->bsize, tx_depth, PLANE_TYPE_Y);
1955
127k
    const TxSize  tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
1956
127k
    const int     tx_width_uv  = tx_size_wide[tx_size_uv];
1957
127k
    const int     tx_height_uv = tx_size_high[tx_size_uv];
1958
127k
    const bool    is_inter = (is_inter_mode(cand_bf->cand->block_mi.mode) || cand_bf->cand->block_mi.use_intrabc) ? true
1959
127k
                                                                                                                  : false;
1960
127k
    const int     tu_count = tx_depth ? 1 : tx_blocks_per_depth[ctx->blk_geom->bsize][tx_depth]; //NM: 128x128 exeption
1961
127k
    uint32_t      txb_1d_offset = 0;
1962
1963
127k
    int txb_itr = 0;
1964
127k
    do {
1965
127k
        const uint32_t txb_origin_x        = tx_org[ctx->blk_geom->bsize][is_inter][tx_depth][txb_itr].x;
1966
127k
        const uint32_t txb_origin_y        = tx_org[ctx->blk_geom->bsize][is_inter][tx_depth][txb_itr].y;
1967
127k
        int32_t        cropped_tx_width_uv = MIN(
1968
127k
            (uint32_t)tx_width_uv, (pcs->ppcs->aligned_width >> 1) - ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1));
1969
127k
        int32_t cropped_tx_height_uv = MIN(
1970
127k
            (uint32_t)tx_height_uv,
1971
127k
            (pcs->ppcs->aligned_height >> 1) - ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1));
1972
127k
        uint32_t tu_cb_origin_index = (ROUND_UV(txb_origin_x) +
1973
127k
                                       (ROUND_UV(txb_origin_y) * cand_bf->residual->u_stride)) >>
1974
127k
            1;
1975
127k
        uint32_t tu_cr_origin_index = (ROUND_UV(txb_origin_x) +
1976
127k
                                       (ROUND_UV(txb_origin_y) * cand_bf->residual->v_stride)) >>
1977
127k
            1;
1978
127k
        TxCoeffShape pf_shape = ctx->pf_ctrls.pf_shape;
1979
127k
        if (ctx->md_stage == MD_STAGE_3 && ctx->use_tx_shortcuts_mds3 && ctx->chroma_complexity == COMPONENT_LUMA) {
1980
0
            pf_shape = N4_SHAPE;
1981
0
        }
1982
        // for chroma path, use luma coeff info to make shortcut decisions (available even if MDS1 is skipped)
1983
127k
        else if (ctx->tx_shortcut_ctrls.apply_pf_on_coeffs && ctx->md_stage == MD_STAGE_3 &&
1984
0
                 ctx->chroma_complexity == COMPONENT_LUMA) {
1985
0
            uint8_t use_pfn4_cond = 0;
1986
1987
0
            const uint16_t th = (tx_width_uv >> 4) * (tx_height_uv >> 4);
1988
0
            use_pfn4_cond     = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0;
1989
1990
0
            if (use_pfn4_cond) {
1991
0
                pf_shape = N4_SHAPE;
1992
0
            }
1993
0
        }
1994
        //    This function replaces the previous Intra Chroma mode if the LM fast
1995
        //    cost is better.
1996
        //    *Note - this might require that we have inv transform in the loop
1997
127k
        if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA ||
1998
127k
            component_type == COMPONENT_ALL) {
1999
127k
            ctx->cb_txb_skip_context = 0;
2000
127k
            ctx->cb_dc_sign_context  = 0;
2001
127k
            if (ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) {
2002
0
                svt_aom_get_txb_ctx(pcs,
2003
0
                                    COMPONENT_CHROMA,
2004
0
                                    ctx->cb_dc_sign_level_coeff_na,
2005
0
                                    ROUND_UV(ctx->blk_org_x + txb_origin_x) >> 1,
2006
0
                                    ROUND_UV(ctx->blk_org_y + txb_origin_y) >> 1,
2007
0
                                    ctx->blk_geom->bsize_uv,
2008
0
                                    tx_size_uv,
2009
0
                                    &ctx->cb_txb_skip_context,
2010
0
                                    &ctx->cb_dc_sign_context);
2011
0
            }
2012
            // Configure the Chroma Residual Ptr
2013
2014
127k
            chroma_residual_ptr = &(((int16_t*)cand_bf->residual->u_buffer)[tu_cb_origin_index]);
2015
2016
            // Cb Transform
2017
127k
            svt_aom_estimate_transform(pcs,
2018
127k
                                       ctx,
2019
127k
                                       chroma_residual_ptr,
2020
127k
                                       cand_bf->residual->u_stride,
2021
127k
                                       &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]),
2022
127k
                                       NOT_USED_VALUE,
2023
127k
                                       tx_size_uv,
2024
127k
                                       &ctx->three_quad_energy,
2025
127k
                                       ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2026
127k
                                       cand_bf->cand->transform_type_uv,
2027
127k
                                       PLANE_TYPE_UV,
2028
127k
                                       pf_shape);
2029
2030
127k
            int32_t seg_qp               = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled
2031
127k
                              ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q]
2032
127k
                              : 0;
2033
127k
            cand_bf->quant_dc.u[txb_itr] = svt_aom_quantize_inv_quantize(
2034
127k
                pcs,
2035
127k
                ctx,
2036
127k
                &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]),
2037
127k
                &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset]),
2038
127k
                &(((int32_t*)cand_bf->rec_coeff->u_buffer)[txb_1d_offset]),
2039
127k
                chroma_qindex,
2040
127k
                seg_qp,
2041
127k
                tx_size_uv,
2042
127k
                &cand_bf->eob.u[txb_itr],
2043
127k
                COMPONENT_CHROMA_CB,
2044
127k
                ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2045
127k
                cand_bf->cand->transform_type_uv,
2046
127k
                ctx->cb_txb_skip_context,
2047
127k
                ctx->cb_dc_sign_context,
2048
127k
                cand_bf->cand->block_mi.mode,
2049
127k
                full_lambda,
2050
127k
                false);
2051
2052
127k
            if (is_full_loop && ctx->mds_do_spatial_sse) {
2053
127k
                uint32_t cb_has_coeff = cand_bf->eob.u[txb_itr] > 0;
2054
2055
127k
                if (cb_has_coeff) {
2056
5.47k
                    svt_aom_inv_transform_recon_wrapper(pcs,
2057
5.47k
                                                        ctx,
2058
5.47k
                                                        cand_bf->pred->u_buffer,
2059
5.47k
                                                        tu_cb_origin_index,
2060
5.47k
                                                        cand_bf->pred->u_stride,
2061
5.47k
                                                        cand_bf->recon->u_buffer,
2062
5.47k
                                                        tu_cb_origin_index,
2063
5.47k
                                                        cand_bf->recon->u_stride,
2064
5.47k
                                                        (int32_t*)cand_bf->rec_coeff->u_buffer,
2065
5.47k
                                                        txb_1d_offset,
2066
5.47k
                                                        ctx->hbd_md,
2067
5.47k
                                                        tx_size_uv,
2068
5.47k
                                                        cand_bf->cand->transform_type_uv,
2069
5.47k
                                                        PLANE_TYPE_UV,
2070
5.47k
                                                        (uint32_t)cand_bf->eob.u[txb_itr]);
2071
121k
                } else {
2072
121k
                    svt_av1_picture_copy_cb(cand_bf->pred,
2073
121k
                                            tu_cb_origin_index,
2074
121k
                                            cand_bf->recon,
2075
121k
                                            tu_cb_origin_index,
2076
121k
                                            tx_width_uv,
2077
121k
                                            tx_height_uv,
2078
121k
                                            ctx->hbd_md);
2079
121k
                }
2080
2081
127k
                const uint32_t input_chroma_txb_origin_index = ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1) +
2082
127k
                    ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1) * input_pic->u_stride;
2083
127k
                const int32_t txb_uv_origin_index = (ROUND_UV(txb_origin_x) +
2084
127k
                                                     (ROUND_UV(txb_origin_y) * cand_bf->quant->u_stride)) >>
2085
127k
                    1;
2086
2087
128k
                if (ssim_level == SSIM_LVL_1 || ssim_level == SSIM_LVL_3) {
2088
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION] = svt_spatial_full_distortion_ssim_kernel(
2089
0
                        input_pic->u_buffer,
2090
0
                        input_chroma_txb_origin_index,
2091
0
                        input_pic->u_stride,
2092
0
                        cand_bf->pred->u_buffer,
2093
0
                        txb_uv_origin_index,
2094
0
                        cand_bf->pred->u_stride,
2095
0
                        cropped_tx_width_uv,
2096
0
                        cropped_tx_height_uv,
2097
0
                        ctx->hbd_md,
2098
0
                        effective_ac_bias);
2099
2100
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL] = svt_spatial_full_distortion_ssim_kernel(
2101
0
                        input_pic->u_buffer,
2102
0
                        input_chroma_txb_origin_index,
2103
0
                        input_pic->u_stride,
2104
0
                        cand_bf->recon->u_buffer,
2105
0
                        txb_uv_origin_index,
2106
0
                        cand_bf->recon->u_stride,
2107
0
                        cropped_tx_width_uv,
2108
0
                        cropped_tx_height_uv,
2109
0
                        ctx->hbd_md,
2110
0
                        effective_ac_bias);
2111
2112
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION] <<= 4;
2113
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL] <<= 4;
2114
0
                }
2115
127k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
2116
127k
                    input_pic->u_buffer,
2117
127k
                    input_chroma_txb_origin_index,
2118
127k
                    input_pic->u_stride,
2119
127k
                    cand_bf->pred->u_buffer,
2120
127k
                    txb_uv_origin_index,
2121
127k
                    cand_bf->pred->u_stride,
2122
127k
                    cropped_tx_width_uv,
2123
127k
                    cropped_tx_height_uv);
2124
127k
                if (effective_ac_bias) {
2125
0
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] += get_svt_psy_full_dist(
2126
0
                        input_pic->u_buffer,
2127
0
                        input_chroma_txb_origin_index,
2128
0
                        input_pic->u_stride,
2129
0
                        cand_bf->pred->u_buffer,
2130
0
                        txb_uv_origin_index,
2131
0
                        cand_bf->pred->u_stride,
2132
0
                        cropped_tx_width_uv,
2133
0
                        cropped_tx_height_uv,
2134
0
                        ctx->hbd_md,
2135
0
                        effective_ac_bias);
2136
0
                }
2137
2138
127k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
2139
127k
                    input_pic->u_buffer,
2140
127k
                    input_chroma_txb_origin_index,
2141
127k
                    input_pic->u_stride,
2142
127k
                    cand_bf->recon->u_buffer,
2143
127k
                    txb_uv_origin_index,
2144
127k
                    cand_bf->recon->u_stride,
2145
127k
                    cropped_tx_width_uv,
2146
127k
                    cropped_tx_height_uv);
2147
127k
                if (effective_ac_bias) {
2148
0
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] += get_svt_psy_full_dist(
2149
0
                        input_pic->u_buffer,
2150
0
                        input_chroma_txb_origin_index,
2151
0
                        input_pic->u_stride,
2152
0
                        cand_bf->recon->u_buffer,
2153
0
                        txb_uv_origin_index,
2154
0
                        cand_bf->recon->u_stride,
2155
0
                        cropped_tx_width_uv,
2156
0
                        cropped_tx_height_uv,
2157
0
                        ctx->hbd_md,
2158
0
                        effective_ac_bias);
2159
0
                }
2160
2161
127k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] <<= 4;
2162
127k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] <<= 4;
2163
127k
            } else {
2164
                // *Full Distortion (SSE)
2165
                // *Note - there are known issues with how this distortion metric is currently
2166
                //    calculated.  The amount of scaling between the two arrays is not
2167
                //    equivalent.
2168
169
                uint32_t bwidth  = tx_width_uv;
2169
169
                uint32_t bheight = tx_height_uv;
2170
169
                if (pf_shape) {
2171
0
                    bwidth  = MAX((bwidth >> pf_shape), 4);
2172
0
                    bheight = (bheight >> pf_shape);
2173
0
                }
2174
169
                svt_aom_picture_full_distortion32_bits_single(
2175
169
                    &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]),
2176
169
                    &(((int32_t*)cand_bf->rec_coeff->u_buffer)[txb_1d_offset]),
2177
169
                    tx_width_uv,
2178
169
                    bwidth,
2179
169
                    bheight,
2180
169
                    txb_full_distortion[DIST_SSD][1],
2181
169
                    cand_bf->eob.u[txb_itr]);
2182
2183
169
                const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2;
2184
169
                txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(
2185
169
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL], chroma_shift);
2186
169
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(
2187
169
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION], chroma_shift);
2188
169
            }
2189
127k
            cand_bf->u_has_coeff |= ((cand_bf->eob.u[txb_itr] != 0) << txb_itr);
2190
127k
            cb_full_distortion[DIST_SSIM][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL];
2191
127k
            cb_full_distortion[DIST_SSIM][DIST_CALC_PREDICTION] +=
2192
127k
                txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION];
2193
2194
127k
            cb_full_distortion[DIST_SSD][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL];
2195
127k
            cb_full_distortion[DIST_SSD][DIST_CALC_PREDICTION] +=
2196
127k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION];
2197
127k
        }
2198
2199
128k
        if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA ||
2200
128k
            component_type == COMPONENT_ALL) {
2201
128k
            ctx->cr_txb_skip_context = 0;
2202
128k
            ctx->cr_dc_sign_context  = 0;
2203
128k
            if (ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) {
2204
0
                svt_aom_get_txb_ctx(pcs,
2205
0
                                    COMPONENT_CHROMA,
2206
0
                                    ctx->cr_dc_sign_level_coeff_na,
2207
0
                                    ROUND_UV(ctx->blk_org_x + txb_origin_x) >> 1,
2208
0
                                    ROUND_UV(ctx->blk_org_y + txb_origin_y) >> 1,
2209
0
                                    ctx->blk_geom->bsize_uv,
2210
0
                                    tx_size_uv,
2211
0
                                    &ctx->cr_txb_skip_context,
2212
0
                                    &ctx->cr_dc_sign_context);
2213
0
            }
2214
            // Configure the Chroma Residual Ptr
2215
2216
128k
            chroma_residual_ptr = &(((int16_t*)cand_bf->residual->v_buffer)[tu_cr_origin_index]);
2217
2218
            // Cr Transform
2219
128k
            svt_aom_estimate_transform(pcs,
2220
128k
                                       ctx,
2221
128k
                                       chroma_residual_ptr,
2222
128k
                                       cand_bf->residual->v_stride,
2223
128k
                                       &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]),
2224
128k
                                       NOT_USED_VALUE,
2225
128k
                                       tx_size_uv,
2226
128k
                                       &ctx->three_quad_energy,
2227
128k
                                       ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2228
128k
                                       cand_bf->cand->transform_type_uv,
2229
128k
                                       PLANE_TYPE_UV,
2230
128k
                                       pf_shape);
2231
128k
            int32_t seg_qp               = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled
2232
128k
                              ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q]
2233
128k
                              : 0;
2234
128k
            cand_bf->quant_dc.v[txb_itr] = svt_aom_quantize_inv_quantize(
2235
128k
                pcs,
2236
128k
                ctx,
2237
128k
                &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]),
2238
128k
                &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset]),
2239
128k
                &(((int32_t*)cand_bf->rec_coeff->v_buffer)[txb_1d_offset]),
2240
128k
                chroma_qindex,
2241
128k
                seg_qp,
2242
128k
                tx_size_uv,
2243
128k
                &cand_bf->eob.v[txb_itr],
2244
128k
                COMPONENT_CHROMA_CR,
2245
128k
                ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2246
128k
                cand_bf->cand->transform_type_uv,
2247
128k
                ctx->cr_txb_skip_context,
2248
128k
                ctx->cr_dc_sign_context,
2249
128k
                cand_bf->cand->block_mi.mode,
2250
128k
                full_lambda,
2251
128k
                false);
2252
128k
            if (is_full_loop && ctx->mds_do_spatial_sse) {
2253
127k
                uint32_t cr_has_coeff = cand_bf->eob.v[txb_itr] > 0;
2254
2255
127k
                if (cr_has_coeff) {
2256
5.46k
                    svt_aom_inv_transform_recon_wrapper(pcs,
2257
5.46k
                                                        ctx,
2258
5.46k
                                                        cand_bf->pred->v_buffer,
2259
5.46k
                                                        tu_cr_origin_index,
2260
5.46k
                                                        cand_bf->pred->v_stride,
2261
5.46k
                                                        cand_bf->recon->v_buffer,
2262
5.46k
                                                        tu_cr_origin_index,
2263
5.46k
                                                        cand_bf->recon->v_stride,
2264
5.46k
                                                        (int32_t*)cand_bf->rec_coeff->v_buffer,
2265
5.46k
                                                        txb_1d_offset,
2266
5.46k
                                                        ctx->hbd_md,
2267
5.46k
                                                        tx_size_uv,
2268
5.46k
                                                        cand_bf->cand->transform_type_uv,
2269
5.46k
                                                        PLANE_TYPE_UV,
2270
5.46k
                                                        (uint32_t)cand_bf->eob.v[txb_itr]);
2271
121k
                } else {
2272
121k
                    svt_av1_picture_copy_cr(cand_bf->pred,
2273
121k
                                            tu_cb_origin_index,
2274
121k
                                            cand_bf->recon,
2275
121k
                                            tu_cb_origin_index,
2276
121k
                                            tx_width_uv,
2277
121k
                                            tx_height_uv,
2278
121k
                                            ctx->hbd_md);
2279
121k
                }
2280
127k
                const uint32_t input_chroma_txb_origin_index = ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1) +
2281
127k
                    ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1) * input_pic->v_stride;
2282
127k
                const int32_t txb_uv_origin_index = (ROUND_UV(txb_origin_x) +
2283
127k
                                                     (ROUND_UV(txb_origin_y) * cand_bf->quant->v_stride)) >>
2284
127k
                    1;
2285
2286
128k
                if (ssim_level == SSIM_LVL_1 || ssim_level == SSIM_LVL_3) {
2287
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION] = svt_spatial_full_distortion_ssim_kernel(
2288
0
                        input_pic->v_buffer,
2289
0
                        input_chroma_txb_origin_index,
2290
0
                        input_pic->v_stride,
2291
0
                        cand_bf->pred->v_buffer,
2292
0
                        txb_uv_origin_index,
2293
0
                        cand_bf->pred->v_stride,
2294
0
                        cropped_tx_width_uv,
2295
0
                        cropped_tx_height_uv,
2296
0
                        ctx->hbd_md,
2297
0
                        effective_ac_bias);
2298
2299
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL] = svt_spatial_full_distortion_ssim_kernel(
2300
0
                        input_pic->v_buffer,
2301
0
                        input_chroma_txb_origin_index,
2302
0
                        input_pic->v_stride,
2303
0
                        cand_bf->recon->v_buffer,
2304
0
                        txb_uv_origin_index,
2305
0
                        cand_bf->recon->v_stride,
2306
0
                        cropped_tx_width_uv,
2307
0
                        cropped_tx_height_uv,
2308
0
                        ctx->hbd_md,
2309
0
                        effective_ac_bias);
2310
2311
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION] <<= 4;
2312
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL] <<= 4;
2313
0
                }
2314
127k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
2315
127k
                    input_pic->v_buffer,
2316
127k
                    input_chroma_txb_origin_index,
2317
127k
                    input_pic->v_stride,
2318
127k
                    cand_bf->pred->v_buffer,
2319
127k
                    txb_uv_origin_index,
2320
127k
                    cand_bf->pred->v_stride,
2321
127k
                    cropped_tx_width_uv,
2322
127k
                    cropped_tx_height_uv);
2323
127k
                if (effective_ac_bias) {
2324
0
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] += get_svt_psy_full_dist(
2325
0
                        input_pic->v_buffer,
2326
0
                        input_chroma_txb_origin_index,
2327
0
                        input_pic->v_stride,
2328
0
                        cand_bf->pred->v_buffer,
2329
0
                        txb_uv_origin_index,
2330
0
                        cand_bf->pred->v_stride,
2331
0
                        cropped_tx_width_uv,
2332
0
                        cropped_tx_height_uv,
2333
0
                        ctx->hbd_md,
2334
0
                        effective_ac_bias);
2335
0
                }
2336
2337
127k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
2338
127k
                    input_pic->v_buffer,
2339
127k
                    input_chroma_txb_origin_index,
2340
127k
                    input_pic->v_stride,
2341
127k
                    cand_bf->recon->v_buffer,
2342
127k
                    txb_uv_origin_index,
2343
127k
                    cand_bf->recon->v_stride,
2344
127k
                    cropped_tx_width_uv,
2345
127k
                    cropped_tx_height_uv);
2346
127k
                if (effective_ac_bias) {
2347
0
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] += get_svt_psy_full_dist(
2348
0
                        input_pic->v_buffer,
2349
0
                        input_chroma_txb_origin_index,
2350
0
                        input_pic->v_stride,
2351
0
                        cand_bf->recon->v_buffer,
2352
0
                        txb_uv_origin_index,
2353
0
                        cand_bf->recon->v_stride,
2354
0
                        cropped_tx_width_uv,
2355
0
                        cropped_tx_height_uv,
2356
0
                        ctx->hbd_md,
2357
0
                        effective_ac_bias);
2358
0
                }
2359
2360
127k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] <<= 4;
2361
127k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] <<= 4;
2362
127k
            } else {
2363
                // *Full Distortion (SSE)
2364
                // *Note - there are known issues with how this distortion metric is currently
2365
                //    calculated.  The amount of scaling between the two arrays is not
2366
                //    equivalent.
2367
913
                uint32_t bwidth  = tx_width_uv;
2368
913
                uint32_t bheight = tx_height_uv;
2369
913
                if (pf_shape) {
2370
0
                    bwidth  = MAX((bwidth >> pf_shape), 4);
2371
0
                    bheight = (bheight >> pf_shape);
2372
0
                }
2373
913
                svt_aom_picture_full_distortion32_bits_single(
2374
913
                    &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]),
2375
913
                    &(((int32_t*)cand_bf->rec_coeff->v_buffer)[txb_1d_offset]),
2376
913
                    tx_width_uv,
2377
913
                    bwidth,
2378
913
                    bheight,
2379
913
                    txb_full_distortion[DIST_SSD][2],
2380
913
                    cand_bf->eob.v[txb_itr]);
2381
2382
913
                const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2;
2383
913
                txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(
2384
913
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL], chroma_shift);
2385
913
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(
2386
913
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION], chroma_shift);
2387
913
            }
2388
128k
            cand_bf->v_has_coeff |= ((cand_bf->eob.v[txb_itr] != 0) << txb_itr);
2389
128k
            cr_full_distortion[DIST_SSIM][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL];
2390
128k
            cr_full_distortion[DIST_SSIM][DIST_CALC_PREDICTION] +=
2391
128k
                txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION];
2392
2393
128k
            cr_full_distortion[DIST_SSD][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL];
2394
128k
            cr_full_distortion[DIST_SSD][DIST_CALC_PREDICTION] +=
2395
128k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION];
2396
128k
        }
2397
2398
127k
        const uint32_t txb_origin_index = txb_origin_x + txb_origin_y * cand_bf->quant->y_stride;
2399
2400
        // Reset the Bit Costs
2401
127k
        uint64_t y_txb_coeff_bits  = 0;
2402
127k
        uint64_t cb_txb_coeff_bits = 0;
2403
127k
        uint64_t cr_txb_coeff_bits = 0;
2404
2405
        //CHROMA-ONLY
2406
127k
        svt_aom_txb_estimate_coeff_bits(ctx,
2407
127k
                                        0,
2408
127k
                                        NULL,
2409
127k
                                        pcs,
2410
127k
                                        cand_bf,
2411
127k
                                        txb_origin_index,
2412
127k
                                        txb_1d_offset,
2413
127k
                                        cand_bf->quant,
2414
127k
                                        cand_bf->eob.y[txb_itr],
2415
127k
                                        cand_bf->eob.u[txb_itr],
2416
127k
                                        cand_bf->eob.v[txb_itr],
2417
127k
                                        &y_txb_coeff_bits,
2418
127k
                                        &cb_txb_coeff_bits,
2419
127k
                                        &cr_txb_coeff_bits,
2420
127k
                                        tx_size,
2421
127k
                                        tx_size_uv,
2422
127k
                                        cand_bf->cand->transform_type[txb_itr],
2423
127k
                                        cand_bf->cand->transform_type_uv,
2424
127k
                                        component_type);
2425
2426
127k
        *cb_coeff_bits += cb_txb_coeff_bits;
2427
127k
        *cr_coeff_bits += cr_txb_coeff_bits;
2428
127k
        txb_1d_offset += tx_width_uv * tx_height_uv;
2429
2430
127k
        ++txb_itr;
2431
127k
    } while (txb_itr < tu_count);
2432
127k
}
2433
2434
/*
2435
  check if we need to do inverse transform and recon
2436
*/
2437
248k
uint8_t svt_aom_do_md_recon(PictureParentControlSet* pcs, ModeDecisionContext* ctx) {
2438
248k
    const uint8_t encdec_bypass = ctx->bypass_encdec &&
2439
248k
        (ctx->pd_pass == PD_PASS_1); // if enc dec is bypassed MD has to produce the final recon
2440
248k
    const uint8_t need_md_rec_for_intra_pred = !ctx->skip_intra ||
2441
0
        ctx->inter_intra_comp_ctrls.enabled; // for intra prediction of current frame
2442
248k
    const uint8_t need_md_rec_for_ref = (pcs->is_ref || pcs->scs->static_config.recon_enabled) &&
2443
0
        encdec_bypass; // for inter prediction of future frame or if recon is being output
2444
248k
    const uint8_t need_md_rec_for_dlf_search  = pcs->dlf_ctrls.enabled; // for DLF levels
2445
248k
    const uint8_t need_md_rec_for_cdef_search = pcs->cdef_search_ctrls.enabled &&
2446
248k
        !pcs->cdef_search_ctrls.use_qp_strength &&
2447
0
        !pcs->cdef_search_ctrls.use_reference_cdef_fs; // CDEF search levels needing the recon samples
2448
248k
    const uint8_t need_md_rec_for_restoration_search = pcs->enable_restoration; // any resoration search level
2449
248k
    const uint8_t need_md_rec_for_quality            = (pcs->compute_psnr || pcs->compute_ssim) &&
2450
0
        (ctx->pd_pass == PD_PASS_1); // stat report needs recon samples for metrics
2451
248k
    uint8_t do_recon;
2452
248k
    if (need_md_rec_for_intra_pred || need_md_rec_for_ref || need_md_rec_for_dlf_search ||
2453
248k
        need_md_rec_for_cdef_search || need_md_rec_for_restoration_search || need_md_rec_for_quality) {
2454
248k
        do_recon = 1;
2455
248k
    } else {
2456
55
        do_recon = 0;
2457
55
    }
2458
2459
248k
    return do_recon;
2460
248k
}