Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/full_loop.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 3-Clause Clear License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
#include "definitions.h"
14
#include "full_loop.h"
15
#include "pcs.h"
16
#include "rd_cost.h"
17
#include "aom_dsp_rtcd.h"
18
#include "sequence_control_set.h"
19
#include "utility.h"
20
#include "ac_bias.h"
21
22
const int av1_get_tx_scale_tab[TX_SIZES_ALL] = {0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 1, 1};
23
24
void     svt_aom_residual_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, uint8_t* pred,
25
                                 uint32_t pred_offset, uint32_t pred_stride, int16_t* residual, uint32_t residual_offset,
26
                                 uint32_t residual_stride, bool hbd, uint32_t area_width, uint32_t area_height);
27
uint64_t svt_spatial_full_distortion_ssim_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride,
28
                                                 uint8_t* recon, int32_t recon_offset, uint32_t recon_stride,
29
                                                 uint32_t area_width, uint32_t area_height, bool hbd, double ac_bias);
30
31
void svt_aom_quantize_b_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
32
                          const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
33
                          TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr,
34
                          const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr,
35
1.38M
                          const int32_t log_scale) {
36
1.38M
    const int32_t zbins[2]  = {ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)};
37
1.38M
    const int32_t nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
38
1.38M
    intptr_t      non_zero_count = n_coeffs, eob = -1;
39
1.38M
    (void)iscan;
40
41
1.38M
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
42
1.38M
    memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
43
44
    // Pre-scan pass
45
27.0M
    for (intptr_t i = n_coeffs - 1; i >= 0; i--) {
46
25.7M
        const int32_t rc    = scan[i];
47
25.7M
        const QmVal   wt    = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
48
25.7M
        const int32_t coeff = coeff_ptr[rc] * wt;
49
50
25.7M
        if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) && coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS))) {
51
25.6M
            non_zero_count--;
52
25.6M
        } else {
53
134k
            break;
54
134k
        }
55
25.7M
    }
56
57
    // Quantization pass: All coefficients with index >= zero_flag are
58
    // skippable. Note: zero_flag can be zero.
59
1.40M
    for (intptr_t i = 0; i < non_zero_count; i++) {
60
12.8k
        const int32_t rc         = scan[i];
61
12.8k
        const int32_t coeff      = coeff_ptr[rc];
62
12.8k
        const int     coeff_sign = coeff < 0 ? -1 : 0;
63
12.8k
        const int32_t abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
64
65
12.8k
        const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
66
12.8k
        if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
67
12.8k
            int64_t tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), INT16_MIN, INT16_MAX);
68
12.8k
            tmp *= wt;
69
12.8k
            int32_t tmp32         = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >>
70
12.8k
                                      (16 - log_scale + AOM_QM_BITS)); // quantization
71
12.8k
            qcoeff_ptr[rc]        = (tmp32 ^ coeff_sign) - coeff_sign;
72
12.8k
            const int32_t iwt     = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
73
12.8k
            const int32_t dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
74
12.8k
            const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
75
12.8k
            dqcoeff_ptr[rc]           = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
76
77
12.8k
            if (tmp32) {
78
12.8k
                eob = i;
79
12.8k
            }
80
12.8k
        }
81
12.8k
    }
82
1.38M
    *eob_ptr = (uint16_t)(eob + 1);
83
1.38M
}
84
85
void svt_aom_highbd_quantize_b_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
86
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
87
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
88
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr,
89
0
                                 const QmVal* iqm_ptr, const int32_t log_scale) {
90
0
    intptr_t eob = -1;
91
0
    (void)iscan;
92
93
0
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
94
0
    memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
95
96
0
    const int32_t zbins[2]  = {ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale), ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)};
97
0
    const int32_t nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
98
0
    intptr_t      idx_arr[4096];
99
0
    int           idx = 0;
100
    // Pre-scan pass
101
0
    for (intptr_t i = 0; i < n_coeffs; i++) {
102
0
        const int32_t rc    = scan[i];
103
0
        const QmVal   wt    = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
104
0
        const int32_t coeff = coeff_ptr[rc] * wt;
105
106
        // If the coefficient is out of the base ZBIN range, keep it for
107
        // quantization.
108
0
        if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) || coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS))) {
109
0
            idx_arr[idx++] = i;
110
0
        }
111
0
    }
112
113
    // Quantization pass: only process the coefficients selected in
114
    // pre-scan pass. Note: idx can be zero.
115
0
    for (int i = 0; i < idx; i++) {
116
0
        const int32_t rc          = scan[idx_arr[i]];
117
0
        const int32_t coeff       = coeff_ptr[rc];
118
0
        const int     coeff_sign  = coeff < 0 ? -1 : 0;
119
0
        const QmVal   wt          = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
120
0
        const QmVal   iwt         = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
121
0
        const int32_t abs_coeff   = (coeff ^ coeff_sign) - coeff_sign;
122
0
        const int64_t tmp1        = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
123
0
        const int64_t tmpw        = tmp1 * wt;
124
0
        const int64_t tmp2        = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
125
0
        const int32_t abs_qcoeff  = (int32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));
126
0
        qcoeff_ptr[rc]            = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
127
0
        int32_t       dequant     = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
128
0
        const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
129
0
        dqcoeff_ptr[rc]           = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
130
0
        if (abs_qcoeff) {
131
0
            eob = idx_arr[i];
132
0
        }
133
0
    }
134
135
0
    *eob_ptr = (uint16_t)(eob + 1);
136
0
}
137
138
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
139
void svt_av1_highbd_quantize_b_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
140
                                      TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc,
141
0
                                      const QuantParam* qparam) {
142
0
    const QmVal* qm_ptr  = qparam->qmatrix;
143
0
    const QmVal* iqm_ptr = qparam->iqmatrix;
144
0
    if (qm_ptr || iqm_ptr) {
145
0
        svt_av1_highbd_quantize_b_qm(coeff_ptr,
146
0
                                     n_coeffs,
147
0
                                     p->zbin_qtx,
148
0
                                     p->round_qtx,
149
0
                                     p->quant_qtx,
150
0
                                     p->quant_shift_qtx,
151
0
                                     qcoeff_ptr,
152
0
                                     dqcoeff_ptr,
153
0
                                     p->dequant_qtx,
154
0
                                     eob_ptr,
155
0
                                     sc->scan,
156
0
                                     sc->iscan,
157
0
                                     qm_ptr,
158
0
                                     iqm_ptr,
159
0
                                     qparam->log_scale);
160
0
    } else {
161
0
        svt_aom_highbd_quantize_b(coeff_ptr,
162
0
                                  n_coeffs,
163
0
                                  p->zbin_qtx,
164
0
                                  p->round_qtx,
165
0
                                  p->quant_qtx,
166
0
                                  p->quant_shift_qtx,
167
0
                                  qcoeff_ptr,
168
0
                                  dqcoeff_ptr,
169
0
                                  p->dequant_qtx,
170
0
                                  eob_ptr,
171
0
                                  sc->scan,
172
0
                                  sc->iscan,
173
0
                                  NULL,
174
0
                                  NULL,
175
0
                                  qparam->log_scale);
176
0
    }
177
0
    assert(qparam->log_scale <= 2);
178
0
}
179
#endif
180
181
static void av1_quantize_b_facade_ii(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
182
                                     TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc,
183
1.38M
                                     const QuantParam* qparam) {
184
1.38M
    const QmVal* qm_ptr  = qparam->qmatrix;
185
1.38M
    const QmVal* iqm_ptr = qparam->iqmatrix;
186
1.38M
    if (qm_ptr || iqm_ptr) {
187
0
        svt_av1_quantize_b_qm(coeff_ptr,
188
0
                              n_coeffs,
189
0
                              p->zbin_qtx,
190
0
                              p->round_qtx,
191
0
                              p->quant_qtx,
192
0
                              p->quant_shift_qtx,
193
0
                              qcoeff_ptr,
194
0
                              dqcoeff_ptr,
195
0
                              p->dequant_qtx,
196
0
                              eob_ptr,
197
0
                              sc->scan,
198
0
                              sc->iscan,
199
0
                              qm_ptr,
200
0
                              iqm_ptr,
201
0
                              qparam->log_scale);
202
1.38M
    } else {
203
1.38M
        svt_aom_quantize_b(coeff_ptr,
204
1.38M
                           n_coeffs,
205
1.38M
                           p->zbin_qtx,
206
1.38M
                           p->round_qtx,
207
1.38M
                           p->quant_qtx,
208
1.38M
                           p->quant_shift_qtx,
209
1.38M
                           qcoeff_ptr,
210
1.38M
                           dqcoeff_ptr,
211
1.38M
                           p->dequant_qtx,
212
1.38M
                           eob_ptr,
213
1.38M
                           sc->scan,
214
1.38M
                           sc->iscan,
215
1.38M
                           NULL,
216
1.38M
                           NULL,
217
1.38M
                           qparam->log_scale);
218
1.38M
    }
219
1.38M
    assert(qparam->log_scale <= 2);
220
1.38M
}
221
222
static void quantize_fp_helper_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
223
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
224
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
225
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr,
226
24.6k
                                 const QmVal* iqm_ptr, int log_scale) {
227
24.6k
    int       i, eob = -1;
228
24.6k
    const int rounding[2] = {ROUND_POWER_OF_TWO(round_ptr[0], log_scale), ROUND_POWER_OF_TWO(round_ptr[1], log_scale)};
229
24.6k
    (void)zbin_ptr;
230
24.6k
    (void)quant_shift_ptr;
231
24.6k
    (void)iscan;
232
233
24.6k
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
234
24.6k
    memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
235
236
24.6k
    if (qm_ptr == NULL && iqm_ptr == NULL) {
237
11.1M
        for (i = 0; i < n_coeffs; i++) {
238
11.1M
            const int     rc         = scan[i];
239
11.1M
            const int32_t thresh     = (int32_t)(dequant_ptr[rc != 0]);
240
11.1M
            const int     coeff      = coeff_ptr[rc];
241
11.1M
            const int     coeff_sign = coeff < 0 ? -1 : 0;
242
11.1M
            int64_t       abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
243
11.1M
            int           tmp32      = 0;
244
11.1M
            if ((abs_coeff << (1 + log_scale)) >= thresh) {
245
11.7k
                abs_coeff = clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
246
11.7k
                tmp32     = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
247
11.7k
                if (tmp32) {
248
11.6k
                    qcoeff_ptr[rc]            = (tmp32 ^ coeff_sign) - coeff_sign;
249
11.6k
                    const TranLow abs_dqcoeff = (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
250
11.6k
                    dqcoeff_ptr[rc]           = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
251
11.6k
                }
252
11.7k
            }
253
11.1M
            if (tmp32) {
254
11.6k
                eob = i;
255
11.6k
            }
256
11.1M
        }
257
18.4E
    } else {
258
        // Quantization pass: All coefficients with index >= zero_flag are
259
        // skippable. Note: zero_flag can be zero.
260
18.4E
        for (i = 0; i < n_coeffs; i++) {
261
0
            const int   rc         = scan[i];
262
0
            const int   coeff      = coeff_ptr[rc];
263
0
            const QmVal wt         = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
264
0
            const QmVal iwt        = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
265
0
            const int   dequant    = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
266
0
            const int   coeff_sign = coeff < 0 ? -1 : 0;
267
0
            int64_t     abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
268
0
            int         tmp32      = 0;
269
0
            if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
270
0
                abs_coeff += rounding[rc != 0];
271
0
                abs_coeff      = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
272
0
                tmp32          = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));
273
0
                qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
274
0
                const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
275
0
                dqcoeff_ptr[rc]           = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
276
0
            }
277
278
0
            if (tmp32) {
279
0
                eob = i;
280
0
            }
281
0
        }
282
18.4E
    }
283
24.6k
    *eob_ptr = eob + 1;
284
24.6k
}
285
286
void svt_av1_quantize_fp_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
287
                           const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
288
                           TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr,
289
14.8k
                           const int16_t* scan, const int16_t* iscan) {
290
14.8k
    quantize_fp_helper_c(coeff_ptr,
291
14.8k
                         n_coeffs,
292
14.8k
                         zbin_ptr,
293
14.8k
                         round_ptr,
294
14.8k
                         quant_ptr,
295
14.8k
                         quant_shift_ptr,
296
14.8k
                         qcoeff_ptr,
297
14.8k
                         dqcoeff_ptr,
298
14.8k
                         dequant_ptr,
299
14.8k
                         eob_ptr,
300
14.8k
                         scan,
301
14.8k
                         iscan,
302
14.8k
                         NULL,
303
14.8k
                         NULL,
304
14.8k
                         0);
305
14.8k
}
306
307
void svt_av1_quantize_fp_qm_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
308
                              const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
309
                              TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr,
310
                              const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr,
311
0
                              int16_t log_scale) {
312
0
    quantize_fp_helper_c(coeff_ptr,
313
0
                         n_coeffs,
314
0
                         zbin_ptr,
315
0
                         round_ptr,
316
0
                         quant_ptr,
317
0
                         quant_shift_ptr,
318
0
                         qcoeff_ptr,
319
0
                         dqcoeff_ptr,
320
0
                         dequant_ptr,
321
0
                         eob_ptr,
322
0
                         scan,
323
0
                         iscan,
324
0
                         qm_ptr,
325
0
                         iqm_ptr,
326
0
                         log_scale);
327
0
}
328
329
static void highbd_quantize_fp_helper_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr,
330
                                        const int16_t* round_ptr, const int16_t* quant_ptr,
331
                                        const int16_t* quant_shift_ptr, TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr,
332
                                        const int16_t* dequant_ptr, uint16_t* eob_ptr, const int16_t* scan,
333
                                        const int16_t* iscan, const QmVal* qm_ptr, const QmVal* iqm_ptr,
334
0
                                        int16_t log_scale) {
335
0
    int       i;
336
0
    int       eob   = -1;
337
0
    const int shift = 16 - log_scale;
338
0
    (void)zbin_ptr;
339
0
    (void)quant_shift_ptr;
340
0
    (void)iscan;
341
342
0
    if (qm_ptr || iqm_ptr) {
343
        // Quantization pass: All coefficients with index >= zero_flag are
344
        // skippable. Note: zero_flag can be zero.
345
0
        for (i = 0; i < count; i++) {
346
0
            const int     rc         = scan[i];
347
0
            const int     coeff      = coeff_ptr[rc];
348
0
            const QmVal   wt         = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
349
0
            const QmVal   iwt        = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
350
0
            const int     dequant    = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
351
0
            const int     coeff_sign = coeff < 0 ? -1 : 0;
352
0
            const int64_t abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
353
0
            if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
354
0
                const int64_t tmp         = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
355
0
                const int     abs_qcoeff  = (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
356
0
                qcoeff_ptr[rc]            = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
357
0
                const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
358
0
                dqcoeff_ptr[rc]           = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
359
0
                if (abs_qcoeff) {
360
0
                    eob = i;
361
0
                }
362
0
            } else {
363
0
                qcoeff_ptr[rc]  = 0;
364
0
                dqcoeff_ptr[rc] = 0;
365
0
            }
366
0
        }
367
0
    } else {
368
0
        const int log_scaled_round_arr[2] = {
369
0
            ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
370
0
            ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
371
0
        };
372
0
        for (i = 0; i < count; i++) {
373
0
            const int rc               = scan[i];
374
0
            const int coeff            = coeff_ptr[rc];
375
0
            const int rc01             = (rc != 0);
376
0
            const int coeff_sign       = coeff < 0 ? -1 : 0;
377
0
            const int abs_coeff        = (coeff ^ coeff_sign) - coeff_sign;
378
0
            const int log_scaled_round = log_scaled_round_arr[rc01];
379
0
            if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
380
0
                const int     quant       = quant_ptr[rc01];
381
0
                const int     dequant     = dequant_ptr[rc01];
382
0
                const int64_t tmp         = (int64_t)abs_coeff + log_scaled_round;
383
0
                const int     abs_qcoeff  = (int)((tmp * quant) >> shift);
384
0
                qcoeff_ptr[rc]            = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
385
0
                const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
386
0
                if (abs_qcoeff) {
387
0
                    eob = i;
388
0
                }
389
0
                dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
390
0
            } else {
391
0
                qcoeff_ptr[rc]  = 0;
392
0
                dqcoeff_ptr[rc] = 0;
393
0
            }
394
0
        }
395
0
    }
396
0
    *eob_ptr = eob + 1;
397
0
}
398
399
void svt_av1_highbd_quantize_fp_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr,
400
                                  const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
401
                                  TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
402
0
                                  uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, int16_t log_scale) {
403
0
    highbd_quantize_fp_helper_c(coeff_ptr,
404
0
                                count,
405
0
                                zbin_ptr,
406
0
                                round_ptr,
407
0
                                quant_ptr,
408
0
                                quant_shift_ptr,
409
0
                                qcoeff_ptr,
410
0
                                dqcoeff_ptr,
411
0
                                dequant_ptr,
412
0
                                eob_ptr,
413
0
                                scan,
414
0
                                iscan,
415
0
                                NULL,
416
0
                                NULL,
417
0
                                log_scale);
418
0
}
419
420
void svt_av1_quantize_fp_32x32_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
421
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
422
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
423
6.76k
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan) {
424
6.76k
    quantize_fp_helper_c(coeff_ptr,
425
6.76k
                         n_coeffs,
426
6.76k
                         zbin_ptr,
427
6.76k
                         round_ptr,
428
6.76k
                         quant_ptr,
429
6.76k
                         quant_shift_ptr,
430
6.76k
                         qcoeff_ptr,
431
6.76k
                         dqcoeff_ptr,
432
6.76k
                         dequant_ptr,
433
6.76k
                         eob_ptr,
434
6.76k
                         scan,
435
6.76k
                         iscan,
436
6.76k
                         NULL,
437
6.76k
                         NULL,
438
6.76k
                         1);
439
6.76k
}
440
441
void svt_av1_quantize_fp_64x64_c(const TranLow* coeff_ptr, intptr_t n_coeffs, const int16_t* zbin_ptr,
442
                                 const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
443
                                 TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
444
2.99k
                                 uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan) {
445
2.99k
    quantize_fp_helper_c(coeff_ptr,
446
2.99k
                         n_coeffs,
447
2.99k
                         zbin_ptr,
448
2.99k
                         round_ptr,
449
2.99k
                         quant_ptr,
450
2.99k
                         quant_shift_ptr,
451
2.99k
                         qcoeff_ptr,
452
2.99k
                         dqcoeff_ptr,
453
2.99k
                         dequant_ptr,
454
2.99k
                         eob_ptr,
455
2.99k
                         scan,
456
2.99k
                         iscan,
457
2.99k
                         NULL,
458
2.99k
                         NULL,
459
2.99k
                         2);
460
2.99k
}
461
462
void svt_av1_quantize_fp_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
463
                                TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr, const ScanOrder* sc,
464
24.6k
                                const QuantParam* qparam) {
465
24.6k
    const QmVal* qm_ptr  = qparam->qmatrix;
466
24.6k
    const QmVal* iqm_ptr = qparam->iqmatrix;
467
468
24.6k
    if (qm_ptr || iqm_ptr) {
469
0
        svt_av1_quantize_fp_qm(coeff_ptr,
470
0
                               n_coeffs,
471
0
                               p->zbin_qtx,
472
0
                               p->round_fp_qtx,
473
0
                               p->quant_fp_qtx,
474
0
                               p->quant_shift_qtx,
475
0
                               qcoeff_ptr,
476
0
                               dqcoeff_ptr,
477
0
                               p->dequant_qtx,
478
0
                               eob_ptr,
479
0
                               sc->scan,
480
0
                               sc->iscan,
481
0
                               qm_ptr,
482
0
                               iqm_ptr,
483
0
                               qparam->log_scale);
484
24.6k
    } else {
485
24.6k
        switch (qparam->log_scale) {
486
14.8k
        case 0:
487
14.8k
            svt_av1_quantize_fp(coeff_ptr,
488
14.8k
                                n_coeffs,
489
14.8k
                                p->zbin_qtx,
490
14.8k
                                p->round_fp_qtx,
491
14.8k
                                p->quant_fp_qtx,
492
14.8k
                                p->quant_shift_qtx,
493
14.8k
                                qcoeff_ptr,
494
14.8k
                                dqcoeff_ptr,
495
14.8k
                                p->dequant_qtx,
496
14.8k
                                eob_ptr,
497
14.8k
                                sc->scan,
498
14.8k
                                sc->iscan);
499
14.8k
            break;
500
6.76k
        case 1:
501
6.76k
            svt_av1_quantize_fp_32x32(coeff_ptr,
502
6.76k
                                      n_coeffs,
503
6.76k
                                      p->zbin_qtx,
504
6.76k
                                      p->round_fp_qtx,
505
6.76k
                                      p->quant_fp_qtx,
506
6.76k
                                      p->quant_shift_qtx,
507
6.76k
                                      qcoeff_ptr,
508
6.76k
                                      dqcoeff_ptr,
509
6.76k
                                      p->dequant_qtx,
510
6.76k
                                      eob_ptr,
511
6.76k
                                      sc->scan,
512
6.76k
                                      sc->iscan);
513
6.76k
            break;
514
2.99k
        case 2:
515
2.99k
            svt_av1_quantize_fp_64x64(coeff_ptr,
516
2.99k
                                      n_coeffs,
517
2.99k
                                      p->zbin_qtx,
518
2.99k
                                      p->round_fp_qtx,
519
2.99k
                                      p->quant_fp_qtx,
520
2.99k
                                      p->quant_shift_qtx,
521
2.99k
                                      qcoeff_ptr,
522
2.99k
                                      dqcoeff_ptr,
523
2.99k
                                      p->dequant_qtx,
524
2.99k
                                      eob_ptr,
525
2.99k
                                      sc->scan,
526
2.99k
                                      sc->iscan);
527
2.99k
            break;
528
0
        default:
529
0
            assert(0);
530
24.6k
        }
531
24.6k
    }
532
24.6k
}
533
534
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
535
void svt_av1_highbd_quantize_fp_facade(const TranLow* coeff_ptr, intptr_t n_coeffs, const MacroblockPlane* p,
536
                                       TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob_ptr,
537
0
                                       const ScanOrder* sc, const QuantParam* qparam) {
538
0
    const QmVal* qm_ptr  = qparam->qmatrix;
539
0
    const QmVal* iqm_ptr = qparam->iqmatrix;
540
0
    if (qm_ptr != NULL && iqm_ptr != NULL) {
541
0
        svt_av1_highbd_quantize_fp_qm(coeff_ptr,
542
0
                                      n_coeffs,
543
0
                                      p->zbin_qtx,
544
0
                                      p->round_fp_qtx,
545
0
                                      p->quant_fp_qtx,
546
0
                                      p->quant_shift_qtx,
547
0
                                      qcoeff_ptr,
548
0
                                      dqcoeff_ptr,
549
0
                                      p->dequant_qtx,
550
0
                                      eob_ptr,
551
0
                                      sc->scan,
552
0
                                      sc->iscan,
553
0
                                      qm_ptr,
554
0
                                      iqm_ptr,
555
0
                                      qparam->log_scale);
556
0
    } else {
557
0
        svt_av1_highbd_quantize_fp(coeff_ptr,
558
0
                                   n_coeffs,
559
0
                                   p->zbin_qtx,
560
0
                                   p->round_fp_qtx,
561
0
                                   p->quant_fp_qtx,
562
0
                                   p->quant_shift_qtx,
563
0
                                   qcoeff_ptr,
564
0
                                   dqcoeff_ptr,
565
0
                                   p->dequant_qtx,
566
0
                                   eob_ptr,
567
0
                                   sc->scan,
568
0
                                   sc->iscan,
569
0
                                   qparam->log_scale);
570
0
    }
571
0
}
572
#endif
573
574
void svt_av1_highbd_quantize_fp_qm_c(const TranLow* coeff_ptr, intptr_t count, const int16_t* zbin_ptr,
575
                                     const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr,
576
                                     TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, const int16_t* dequant_ptr,
577
                                     uint16_t* eob_ptr, const int16_t* scan, const int16_t* iscan, const QmVal* qm_ptr,
578
0
                                     const QmVal* iqm_ptr, int16_t log_scale) {
579
0
    highbd_quantize_fp_helper_c(coeff_ptr,
580
0
                                count,
581
0
                                zbin_ptr,
582
0
                                round_ptr,
583
0
                                quant_ptr,
584
0
                                quant_shift_ptr,
585
0
                                qcoeff_ptr,
586
0
                                dqcoeff_ptr,
587
0
                                dequant_ptr,
588
0
                                eob_ptr,
589
0
                                scan,
590
0
                                iscan,
591
0
                                qm_ptr,
592
0
                                iqm_ptr,
593
0
                                log_scale);
594
0
}
595
596
static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx, int bwl, int height, const uint8_t* levels,
597
9.85k
                                               int coeff_idx, TxSize tx_size, TxClass tx_class) {
598
9.85k
    if (is_last) {
599
9.85k
        if (scan_idx == 0) {
600
9.85k
            return 0;
601
9.85k
        }
602
18.4E
        if (scan_idx <= (height << bwl) >> 3) {
603
0
            return 1;
604
0
        }
605
18.4E
        if (scan_idx <= (height << bwl) >> 2) {
606
0
            return 2;
607
0
        }
608
18.4E
        return 3;
609
18.4E
    }
610
1
    return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
611
9.85k
}
612
613
19.6k
static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
614
19.6k
    if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
615
17.7k
        const int32_t r      = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
616
17.7k
        const int32_t length = get_msb(r) + 1;
617
17.7k
        return av1_cost_literal(2 * length - 1);
618
17.7k
    }
619
1.87k
    return 0;
620
19.6k
}
621
622
19.6k
static INLINE int get_br_cost(TranLow level, const int* coeff_lps) {
623
19.6k
    const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
624
19.6k
    return coeff_lps[base_range] + get_golomb_cost(level);
625
19.6k
}
626
627
static INLINE int get_coeff_cost_general(int is_last, int ci, TranLow abs_qc, int sign, int coeff_ctx, int dc_sign_ctx,
628
                                         const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class,
629
19.6k
                                         const uint8_t* levels) {
630
19.6k
    int cost = 0;
631
19.6k
    if (is_last) {
632
19.6k
        cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
633
18.4E
    } else {
634
18.4E
        cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
635
18.4E
    }
636
19.7k
    if (abs_qc != 0) {
637
19.7k
        if (ci == 0) {
638
19.6k
            cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
639
19.6k
        } else {
640
1
            cost += av1_cost_literal(1);
641
1
        }
642
19.7k
        if (abs_qc > NUM_BASE_LEVELS) {
643
19.6k
            int br_ctx;
644
19.6k
            if (is_last) {
645
19.6k
                br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
646
19.6k
            } else {
647
0
                br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
648
0
            }
649
19.6k
            cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
650
19.6k
        }
651
19.7k
    }
652
19.6k
    return cost;
653
19.6k
}
654
655
33.1k
static INLINE int64_t get_coeff_dist(TranLow tcoeff, TranLow dqcoeff, int shift) {
656
33.1k
    return SQR(((int64_t)tcoeff - dqcoeff) * (int64_t)(1lu << shift));
657
33.1k
}
658
659
9.85k
static INLINE void get_qc_dqc_low(TranLow abs_qc, int sign, int dqv, int shift, TranLow* qc_low, TranLow* dqc_low) {
660
9.85k
    TranLow abs_qc_low = abs_qc - 1;
661
9.85k
    *qc_low            = (-sign ^ abs_qc_low) + sign;
662
9.85k
    assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
663
9.85k
    TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
664
9.85k
    *dqc_low            = (-sign ^ abs_dqc_low) + sign;
665
9.85k
    assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
666
9.85k
}
667
668
static const int golomb_bits_cost[32] = {0,       512,     512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
669
                                         512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
670
                                         512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
671
                                         512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9};
672
static const int golomb_cost_diff[32] = {0,       512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
673
                                         512 * 2, 0,   0,       0, 0,       0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0};
674
675
0
static INLINE int get_br_cost_with_diff(TranLow level, const int* coeff_lps, int* diff) {
676
0
    const int base_range  = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
677
0
    int       golomb_bits = 0;
678
0
    if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
679
0
        *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
680
0
    }
681
682
0
    if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
683
0
        int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
684
0
        if (r < 32) {
685
0
            golomb_bits = golomb_bits_cost[r];
686
0
            *diff += golomb_cost_diff[r];
687
0
        } else {
688
0
            golomb_bits = get_golomb_cost(level);
689
0
            *diff += (r & (r - 1)) == 0 ? 1024 : 0;
690
0
        }
691
0
    }
692
693
0
    return coeff_lps[base_range] + golomb_bits;
694
0
}
695
696
static AOM_FORCE_INLINE int get_two_coeff_cost_simple(int ci, TranLow abs_qc, int coeff_ctx,
697
                                                      const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class,
698
0
                                                      const uint8_t* levels, int* cost_low) {
699
    // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
700
    // and not the last (scan_idx != eob - 1)
701
0
    assert(ci > 0);
702
    //assert(abs_qc + 4 < 4);
703
0
    int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
704
0
    int diff = 0;
705
0
    if (abs_qc <= 3) {
706
0
        diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
707
0
    }
708
0
    if (abs_qc) {
709
0
        cost += av1_cost_literal(1);
710
0
        if (abs_qc > NUM_BASE_LEVELS) {
711
0
            const int br_ctx      = get_br_ctx(levels, ci, bwl, tx_class);
712
0
            int       brcost_diff = 0;
713
0
            cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx], &brcost_diff);
714
0
            diff += brcost_diff;
715
0
        }
716
0
    }
717
0
    *cost_low = cost - diff;
718
719
0
    return cost;
720
0
}
721
722
static INLINE int get_coeff_cost_eob(int ci, TranLow abs_qc, int sign, int coeff_ctx, int dc_sign_ctx,
723
1.79k
                                     const LvMapCoeffCost* txb_costs, int bwl, TxClass tx_class) {
724
1.79k
    int cost = 0;
725
1.79k
    cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
726
1.79k
    if (abs_qc != 0) {
727
1.79k
        if (ci == 0) {
728
1.79k
            cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
729
1.79k
        } else {
730
0
            cost += av1_cost_literal(1);
731
0
        }
732
1.79k
        if (abs_qc > NUM_BASE_LEVELS) {
733
0
            int br_ctx;
734
0
            br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
735
0
            cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
736
0
        }
737
1.79k
    }
738
1.79k
    return cost;
739
1.79k
}
740
741
9.85k
static INLINE int get_dqv(const int16_t* dequant, int coeff_idx, const QmVal* iqm_ptr) {
742
9.85k
    int dqv = dequant[!!coeff_idx];
743
9.85k
    if (iqm_ptr != NULL) {
744
0
        dqv = ((iqm_ptr[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
745
0
    }
746
9.85k
    return dqv;
747
9.85k
}
748
749
static AOM_FORCE_INLINE void update_coeff_eob(int* accu_rate, int64_t* accu_dist, uint16_t* eob, int* nz_num,
750
                                              int* nz_ci, int si, TxSize tx_size, TxClass tx_class, int bwl, int height,
751
                                              int dc_sign_ctx, int64_t rdmult, int shift, const int16_t* dequant,
752
                                              const int16_t* scan, const LvMapEobCost* txb_eob_costs,
753
                                              const LvMapCoeffCost* txb_costs, const TranLow* tcoeff, TranLow* qcoeff,
754
0
                                              TranLow* dqcoeff, uint8_t* levels, int sharpness, const QmVal* iqm_ptr) {
755
0
    assert(si != *eob - 1);
756
0
    const int     ci        = scan[si];
757
0
    const int     dqv       = get_dqv(dequant, ci, iqm_ptr);
758
0
    const TranLow qc        = qcoeff[ci];
759
0
    const int     coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
760
0
    if (qc == 0) {
761
0
        *accu_rate += txb_costs->base_cost[coeff_ctx][0];
762
0
    } else {
763
0
        int           lower_level = 0;
764
0
        const TranLow abs_qc      = abs(qc);
765
0
        const TranLow tqc         = tcoeff[ci];
766
0
        const TranLow dqc         = dqcoeff[ci];
767
0
        const int     sign        = (qc < 0) ? 1 : 0;
768
0
        const int64_t dist0       = get_coeff_dist(tqc, 0, shift);
769
0
        int64_t       dist        = get_coeff_dist(tqc, dqc, shift) - dist0;
770
0
        int           rate        = get_coeff_cost_general(
771
0
            0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
772
0
        int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
773
774
0
        TranLow qc_low, dqc_low;
775
0
        TranLow abs_qc_low;
776
0
        int64_t dist_low, rd_low;
777
0
        int     rate_low;
778
0
        if (abs_qc == 1) {
779
0
            abs_qc_low = 0;
780
0
            dqc_low = qc_low = 0;
781
0
            dist_low         = 0;
782
0
            rate_low         = txb_costs->base_cost[coeff_ctx][0];
783
0
            rd_low           = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
784
0
        } else {
785
0
            get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
786
0
            abs_qc_low = abs_qc - 1;
787
0
            dist_low   = get_coeff_dist(tqc, dqc_low, shift) - dist0;
788
0
            rate_low   = get_coeff_cost_general(
789
0
                0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
790
0
            rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
791
0
        }
792
793
0
        int       lower_level_new_eob = 0;
794
0
        const int new_eob             = si + 1;
795
0
        const int coeff_ctx_new_eob   = get_lower_levels_ctx_eob(bwl, height, si);
796
0
        const int new_eob_cost        = get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
797
0
        int       rate_coeff_eob      = new_eob_cost +
798
0
            get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class);
799
0
        int64_t dist_new_eob = dist;
800
0
        int64_t rd_new_eob   = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
801
802
0
        if (abs_qc_low > 0) {
803
0
            const int rate_coeff_eob_low = new_eob_cost +
804
0
                get_coeff_cost_eob(ci, abs_qc_low, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class);
805
0
            const int64_t dist_new_eob_low = dist_low;
806
0
            const int64_t rd_new_eob_low   = RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
807
0
            if (rd_new_eob_low < rd_new_eob) {
808
0
                lower_level_new_eob = 1;
809
0
                rd_new_eob          = rd_new_eob_low;
810
0
                rate_coeff_eob      = rate_coeff_eob_low;
811
0
                dist_new_eob        = dist_new_eob_low;
812
0
            }
813
0
        }
814
815
0
        if (rd_low < rd) {
816
0
            lower_level = 1;
817
0
            rd          = rd_low;
818
0
            rate        = rate_low;
819
0
            dist        = dist_low;
820
0
        }
821
822
0
        if (sharpness == 0 && rd_new_eob < rd) {
823
0
            for (int ni = 0; ni < *nz_num; ++ni) {
824
0
                int last_ci                          = nz_ci[ni];
825
0
                levels[get_padded_idx(last_ci, bwl)] = 0;
826
0
                qcoeff[last_ci]                      = 0;
827
0
                dqcoeff[last_ci]                     = 0;
828
0
            }
829
0
            *eob        = new_eob;
830
0
            *nz_num     = 0;
831
0
            *accu_rate  = rate_coeff_eob;
832
0
            *accu_dist  = dist_new_eob;
833
0
            lower_level = lower_level_new_eob;
834
0
        } else {
835
0
            *accu_rate += rate;
836
0
            *accu_dist += dist;
837
0
        }
838
839
0
        if (lower_level) {
840
0
            qcoeff[ci]                      = qc_low;
841
0
            dqcoeff[ci]                     = dqc_low;
842
0
            levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
843
0
        }
844
0
        if (qcoeff[ci]) {
845
0
            nz_ci[*nz_num] = ci;
846
0
            ++*nz_num;
847
0
        }
848
0
    }
849
0
}
850
851
static INLINE void update_coeff_general(int* accu_rate, int64_t* accu_dist, int si, int eob, TxSize tx_size,
852
                                        TxClass tx_class, int bwl, int height, int64_t rdmult, int shift,
853
                                        int dc_sign_ctx, const int16_t* dequant, const int16_t* scan,
854
                                        const LvMapCoeffCost* txb_costs, const TranLow* tcoeff, TranLow* qcoeff,
855
9.85k
                                        TranLow* dqcoeff, uint8_t* levels, const QmVal* iqm_ptr) {
856
9.85k
    const int     ci        = scan[si];
857
9.85k
    const int     dqv       = get_dqv(dequant, ci, iqm_ptr);
858
9.85k
    const TranLow qc        = qcoeff[ci];
859
9.85k
    const int     is_last   = si == (eob - 1);
860
9.85k
    const int     coeff_ctx = get_lower_levels_ctx_general(is_last, si, bwl, height, levels, ci, tx_size, tx_class);
861
9.85k
    if (qc == 0) {
862
0
        *accu_rate += txb_costs->base_cost[coeff_ctx][0];
863
9.85k
    } else {
864
18.4E
        const int     sign   = (qc < 0) ? 1 : 0;
865
9.85k
        const TranLow abs_qc = abs(qc);
866
9.85k
        const TranLow tqc    = tcoeff[ci];
867
9.85k
        const TranLow dqc    = dqcoeff[ci];
868
9.85k
        const int64_t dist   = get_coeff_dist(tqc, dqc, shift);
869
9.85k
        const int64_t dist0  = get_coeff_dist(tqc, 0, shift);
870
9.85k
        const int     rate   = get_coeff_cost_general(
871
9.85k
            is_last, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
872
9.85k
        const int64_t rd = RDCOST(rdmult, rate, dist);
873
874
9.85k
        TranLow qc_low, dqc_low;
875
9.85k
        TranLow abs_qc_low;
876
9.85k
        int64_t dist_low, rd_low;
877
9.85k
        int     rate_low;
878
9.85k
        if (abs_qc == 1) {
879
0
            abs_qc_low = qc_low = dqc_low = 0;
880
0
            dist_low                      = dist0;
881
0
            rate_low                      = txb_costs->base_cost[coeff_ctx][0];
882
9.85k
        } else {
883
9.85k
            get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
884
9.85k
            abs_qc_low = abs_qc - 1;
885
9.85k
            dist_low   = get_coeff_dist(tqc, dqc_low, shift);
886
9.85k
            rate_low   = get_coeff_cost_general(
887
9.85k
                is_last, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels);
888
9.85k
        }
889
890
9.85k
        rd_low = RDCOST(rdmult, rate_low, dist_low);
891
9.85k
        if (rd_low < rd) {
892
46
            qcoeff[ci]                      = qc_low;
893
46
            dqcoeff[ci]                     = dqc_low;
894
46
            levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
895
46
            *accu_rate += rate_low;
896
46
            *accu_dist += dist_low - dist0;
897
9.80k
        } else {
898
9.80k
            *accu_rate += rate;
899
9.80k
            *accu_dist += dist - dist0;
900
9.80k
        }
901
9.85k
    }
902
9.85k
}
903
904
static AOM_FORCE_INLINE void update_coeff_simple(int* accu_rate, int si, int eob, TxSize tx_size, TxClass tx_class,
905
                                                 int bwl, int64_t rdmult, int shift, const int16_t* dequant,
906
                                                 const int16_t* scan, const LvMapCoeffCost* txb_costs,
907
                                                 const TranLow* tcoeff, TranLow* qcoeff, TranLow* dqcoeff,
908
0
                                                 uint8_t* levels, const QmVal* iqm_ptr) {
909
0
    const int dqv = get_dqv(dequant, scan[si], iqm_ptr);
910
0
    (void)eob;
911
    // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
912
    // and not the last (scan_idx != eob - 1)
913
0
    assert(si != eob - 1);
914
0
    assert(si > 0);
915
0
    const int     ci        = scan[si];
916
0
    const TranLow qc        = qcoeff[ci];
917
0
    const int     coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
918
0
    if (qc == 0) {
919
0
        *accu_rate += txb_costs->base_cost[coeff_ctx][0];
920
0
    } else {
921
0
        const TranLow abs_qc   = abs(qc);
922
0
        const TranLow abs_tqc  = abs(tcoeff[ci]);
923
0
        const TranLow abs_dqc  = abs(dqcoeff[ci]);
924
0
        int           rate_low = 0;
925
0
        const int rate = get_two_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
926
0
        if (abs_dqc < abs_tqc) {
927
0
            *accu_rate += rate;
928
0
            return;
929
0
        }
930
931
0
        const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
932
0
        const int64_t rd   = RDCOST(rdmult, rate, dist);
933
934
0
        const TranLow abs_qc_low  = abs_qc - 1;
935
0
        const TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
936
0
        const int64_t dist_low    = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
937
0
        const int64_t rd_low      = RDCOST(rdmult, rate_low, dist_low);
938
939
0
        if (rd_low < rd) {
940
0
            const int sign                  = (qc < 0) ? 1 : 0;
941
0
            qcoeff[ci]                      = (-sign ^ abs_qc_low) + sign;
942
0
            dqcoeff[ci]                     = (-sign ^ abs_dqc_low) + sign;
943
0
            levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
944
0
            *accu_rate += rate_low;
945
0
        } else {
946
0
            *accu_rate += rate;
947
0
        }
948
0
    }
949
0
}
950
951
static INLINE void update_skip(int* accu_rate, int64_t accu_dist, uint16_t* eob, int nz_num, int* nz_ci, int64_t rdmult,
952
11.6k
                               int skip_cost, int non_skip_cost, TranLow* qcoeff, TranLow* dqcoeff, int sharpness) {
953
11.6k
    const int64_t rd         = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
954
11.6k
    const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
955
11.6k
    if (sharpness == 0 && rd_new_eob < rd) {
956
0
        for (int i = 0; i < nz_num; ++i) {
957
0
            const int ci = nz_ci[i];
958
0
            qcoeff[ci]   = 0;
959
0
            dqcoeff[ci]  = 0;
960
            // no need to set up levels because this is the last step
961
            // levels[get_padded_idx(ci, bwl)] = 0;
962
0
        }
963
0
        *accu_rate = 0;
964
0
        *eob       = 0;
965
0
    }
966
11.6k
}
967
968
enum {
969
    NO_AQ             = 0,
970
    VARIANCE_AQ       = 1,
971
    COMPLEXITY_AQ     = 2,
972
    CYCLIC_REFRESH_AQ = 3,
973
    AQ_MODE_COUNT // This should always be the last member of the enum
974
} UENUM1BYTE(AQ_MODE);
975
976
enum {
977
    NO_DELTA_Q   = 0,
978
    DELTA_Q_ONLY = 1,
979
    DELTA_Q_LF   = 2,
980
    DELTAQ_MODE_COUNT // This should always be the last member of the enum
981
} UENUM1BYTE(DELTAQ_MODE);
982
983
// These numbers are empirically obtained.
984
#if TUNE_CHROMA_SSIM
985
static const int plane_rd_mult[2][REF_TYPES][PLANE_TYPES] = {{
986
                                                                 {17, 13},
987
                                                                 {16, 10},
988
                                                             },
989
                                                             {
990
                                                                 {17, 13},
991
                                                                 {16, 10},
992
                                                             }};
993
#else
994
static const int plane_rd_mult[2][REF_TYPES][PLANE_TYPES] = {{{17, 20}, {16, 20}},
995
                                                             {
996
                                                                 {17, 13},
997
                                                                 {16, 10},
998
                                                             }};
999
#endif
1000
1001
/*
1002
 * Reduce the number of non-zero quantized coefficients before getting to the main/complex RDOQ stage
1003
 * (it performs an early check of whether to zero out each of the non-zero quantized coefficients,
1004
 * and updates the quantized coeffs if it is determined it can be zeroed out).
1005
 */
1006
static INLINE void update_coeff_eob_fast(uint16_t* eob, int shift, const int16_t* dequant_ptr, const int16_t* scan,
1007
0
                                         const TranLow* coeff_ptr, TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr) {
1008
0
    int       eob_out = *eob;
1009
0
    const int zbin[2] = {dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7),
1010
0
                         dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7)};
1011
0
    for (int i = *eob - 1; i >= 0; i--) {
1012
0
        const int rc         = scan[i];
1013
0
        const int qcoeff     = qcoeff_ptr[rc];
1014
0
        const int coeff      = coeff_ptr[rc];
1015
0
        const int coeff_sign = -(coeff < 0);
1016
0
        int64_t   abs_coeff  = (coeff ^ coeff_sign) - coeff_sign;
1017
0
        if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) {
1018
0
            eob_out--;
1019
0
            qcoeff_ptr[rc]  = 0;
1020
0
            dqcoeff_ptr[rc] = 0;
1021
0
        } else {
1022
0
            break;
1023
0
        }
1024
0
    }
1025
0
    *eob = eob_out;
1026
0
}
1027
1028
static void svt_fast_optimize_b(const TranLow* coeff_ptr, const MacroblockPlane* p, TranLow* qcoeff_ptr,
1029
                                TranLow* dqcoeff_ptr, uint16_t* eob, TxSize tx_size, TxType tx_type)
1030
1031
0
{
1032
0
    const ScanOrder* const scan_order = get_scan_order(tx_size, tx_type);
1033
0
    const int16_t*         scan       = scan_order->scan;
1034
0
    const int              shift      = av1_get_tx_scale_tab[tx_size];
1035
0
    update_coeff_eob_fast(eob, shift, p->dequant_qtx, scan, coeff_ptr, qcoeff_ptr, dqcoeff_ptr);
1036
0
}
1037
1038
static void svt_av1_optimize_b(PictureControlSet* pcs, ModeDecisionContext* ctx, int16_t txb_skip_context,
1039
                               int16_t dc_sign_context, const TranLow* coeff_ptr, const MacroblockPlane* p,
1040
                               TranLow* qcoeff_ptr, TranLow* dqcoeff_ptr, uint16_t* eob, const QuantParam* qparam,
1041
                               TxSize tx_size, TxType tx_type, bool is_inter, uint8_t use_sharpness,
1042
11.6k
                               uint8_t delta_q_present, uint8_t picture_qp, uint32_t lambda, int plane) {
1043
11.6k
    SequenceControlSet*    scs        = pcs->scs;
1044
11.6k
    bool                   allintra   = scs->allintra;
1045
11.6k
    bool                   rtc        = scs->static_config.rtc;
1046
11.6k
    int                    sharpness  = 0; // No Sharpness
1047
11.6k
    const ScanOrder* const scan_order = get_scan_order(tx_size, tx_type);
1048
11.6k
    const int16_t*         scan       = scan_order->scan;
1049
11.6k
    const int              shift      = av1_get_tx_scale_tab[tx_size];
1050
11.6k
    const PlaneType        plane_type = plane;
1051
11.6k
    const TxSize           txs_ctx    = get_txsize_entropy_ctx(tx_size);
1052
11.6k
    const TxClass          tx_class   = tx_type_to_class[tx_type];
1053
11.6k
    const int              bwl        = get_txb_bwl(tx_size);
1054
11.6k
    const int              width      = get_txb_wide(tx_size);
1055
11.6k
    const int              height     = get_txb_high(tx_size);
1056
11.6k
    assert(width == (1 << bwl));
1057
11.6k
    assert(txs_ctx < TX_SIZES);
1058
11.6k
    const LvMapCoeffCost* txb_costs      = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
1059
11.6k
    const int             eob_multi_size = txsize_log2_minus4[tx_size];
1060
11.6k
    const LvMapEobCost*   txb_eob_costs  = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type];
1061
11.6k
    const int             non_skip_cost  = txb_costs->txb_skip_cost[txb_skip_context][0];
1062
11.6k
    const int             skip_cost      = txb_costs->txb_skip_cost[txb_skip_context][1];
1063
11.6k
    const int             eob_cost       = get_eob_cost(*eob, txb_eob_costs, txb_costs, tx_class);
1064
11.6k
    int                   rweight        = 100;
1065
11.6k
    const int32_t         sharpness_val  = CLIP3(0, 7, pcs->scs->static_config.sharpness);
1066
11.6k
    const int             rshift         = MAX(2, (int)sharpness_val);
1067
11.6k
    if (use_sharpness && delta_q_present && plane == 0) {
1068
0
        int diff = ctx->sb_ptr->qindex - quantizer_to_qindex[picture_qp];
1069
0
        if (diff < 0) {
1070
0
            sharpness = 1;
1071
0
            rweight   = 0;
1072
0
        }
1073
0
    }
1074
11.6k
    const int64_t rdmult =
1075
11.6k
        (((((int64_t)lambda * plane_rd_mult[allintra || rtc][is_inter][plane_type]) * rweight) / 100) + 2) >> rshift;
1076
11.6k
    uint8_t* const levels = set_levels(ctx->md_levels_buf, width, height);
1077
1078
11.6k
    if (*eob > 1) {
1079
0
        svt_av1_txb_init_levels(qcoeff_ptr, width, height, levels);
1080
0
    }
1081
11.6k
    int accu_rate = eob_cost;
1082
1083
11.6k
    int64_t       accu_dist  = 0;
1084
11.6k
    int           si         = *eob - 1;
1085
11.6k
    const int     ci         = scan[si];
1086
11.6k
    const TranLow qc         = qcoeff_ptr[ci];
1087
11.6k
    const TranLow abs_qc     = abs(qc);
1088
11.6k
    const int     sign       = qc < 0;
1089
11.6k
    const int     max_nz_num = 4;
1090
11.6k
    int           nz_num     = 1;
1091
11.6k
    int           nz_ci[5]   = {ci, 0, 0, 0, 0};
1092
11.6k
    if (abs_qc >= 2) {
1093
9.85k
        update_coeff_general(&accu_rate,
1094
9.85k
                             &accu_dist,
1095
9.85k
                             si,
1096
9.85k
                             *eob,
1097
9.85k
                             tx_size,
1098
9.85k
                             tx_class,
1099
9.85k
                             bwl,
1100
9.85k
                             height,
1101
9.85k
                             rdmult,
1102
9.85k
                             shift,
1103
9.85k
                             dc_sign_context,
1104
9.85k
                             p->dequant_qtx,
1105
9.85k
                             scan,
1106
9.85k
                             txb_costs,
1107
9.85k
                             coeff_ptr,
1108
9.85k
                             qcoeff_ptr,
1109
9.85k
                             dqcoeff_ptr,
1110
9.85k
                             levels,
1111
9.85k
                             qparam->iqmatrix);
1112
9.85k
        --si;
1113
9.85k
    } else {
1114
1.79k
        assert(abs_qc == 1);
1115
1.79k
        const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si);
1116
1.79k
        accu_rate += get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, dc_sign_context, txb_costs, bwl, tx_class);
1117
1118
1.79k
        const TranLow tqc   = coeff_ptr[ci];
1119
1.79k
        const TranLow dqc   = dqcoeff_ptr[ci];
1120
1.79k
        const int64_t dist  = get_coeff_dist(tqc, dqc, shift);
1121
1.79k
        const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1122
1.79k
        accu_dist += dist - dist0;
1123
1.79k
        --si;
1124
1.79k
    }
1125
11.6k
#define UPDATE_COEFF_EOB_CASE(tx_class_literal)         \
1126
11.6k
    case tx_class_literal:                              \
1127
11.6k
        for (; si >= 0 && nz_num <= max_nz_num; --si) { \
1128
0
            update_coeff_eob(&accu_rate,                \
1129
0
                             &accu_dist,                \
1130
0
                             eob,                       \
1131
0
                             &nz_num,                   \
1132
0
                             nz_ci,                     \
1133
0
                             si,                        \
1134
0
                             tx_size,                   \
1135
0
                             tx_class_literal,          \
1136
0
                             bwl,                       \
1137
0
                             height,                    \
1138
0
                             dc_sign_context,           \
1139
0
                             rdmult,                    \
1140
0
                             shift,                     \
1141
0
                             p->dequant_qtx,            \
1142
0
                             scan,                      \
1143
0
                             txb_eob_costs,             \
1144
0
                             txb_costs,                 \
1145
0
                             coeff_ptr,                 \
1146
0
                             qcoeff_ptr,                \
1147
0
                             dqcoeff_ptr,               \
1148
0
                             levels,                    \
1149
0
                             sharpness,                 \
1150
0
                             qparam->iqmatrix);         \
1151
0
        }                                               \
1152
11.6k
        break;
1153
11.6k
    switch (tx_class) {
1154
11.6k
        UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
1155
0
        UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
1156
0
        UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
1157
0
#undef UPDATE_COEFF_EOB_CASE
1158
0
    default:
1159
0
        assert(false);
1160
11.6k
    }
1161
1162
11.6k
    if (si == -1 && nz_num <= max_nz_num) {
1163
11.6k
        update_skip(&accu_rate,
1164
11.6k
                    accu_dist,
1165
11.6k
                    eob,
1166
11.6k
                    nz_num,
1167
11.6k
                    nz_ci,
1168
11.6k
                    rdmult,
1169
11.6k
                    skip_cost,
1170
11.6k
                    non_skip_cost,
1171
11.6k
                    qcoeff_ptr,
1172
11.6k
                    dqcoeff_ptr,
1173
11.6k
                    sharpness);
1174
11.6k
    }
1175
1176
11.6k
    int si_end = 1; // default: full RDOQ
1177
11.6k
    if (ctx->rdoq_ctrls.cut_off_num) {
1178
11.6k
        const int cut_off_coeff = AOMMAX((width * height) >> 7,
1179
11.6k
                                         (*eob * ctx->rdoq_ctrls.cut_off_num) / ctx->rdoq_ctrls.cut_off_denum);
1180
11.6k
        si_end                  = AOMMAX(1, *eob - cut_off_coeff);
1181
11.6k
    }
1182
11.6k
#define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \
1183
11.6k
    case tx_class_literal:                         \
1184
11.6k
        for (; si >= si_end; --si) {               \
1185
0
            update_coeff_simple(&accu_rate,        \
1186
0
                                si,                \
1187
0
                                *eob,              \
1188
0
                                tx_size,           \
1189
0
                                tx_class_literal,  \
1190
0
                                bwl,               \
1191
0
                                rdmult,            \
1192
0
                                shift,             \
1193
0
                                p->dequant_qtx,    \
1194
0
                                scan,              \
1195
0
                                txb_costs,         \
1196
0
                                coeff_ptr,         \
1197
0
                                qcoeff_ptr,        \
1198
0
                                dqcoeff_ptr,       \
1199
0
                                levels,            \
1200
0
                                qparam->iqmatrix); \
1201
0
        }                                          \
1202
11.6k
        break;
1203
11.6k
    switch (tx_class) {
1204
11.6k
        UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
1205
0
        UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
1206
0
        UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
1207
0
#undef UPDATE_COEFF_SIMPLE_CASE
1208
0
    default:
1209
0
        assert(false);
1210
11.6k
    }
1211
1212
    // DC position
1213
11.6k
    if (si == 0) {
1214
        // no need to update accu_dist because it's not used after this point
1215
0
        int64_t dummy_dist = 0;
1216
0
        update_coeff_general(&accu_rate,
1217
0
                             &dummy_dist,
1218
0
                             si,
1219
0
                             *eob,
1220
0
                             tx_size,
1221
0
                             tx_class,
1222
0
                             bwl,
1223
0
                             height,
1224
0
                             rdmult,
1225
0
                             shift,
1226
0
                             dc_sign_context,
1227
0
                             p->dequant_qtx,
1228
0
                             scan,
1229
0
                             txb_costs,
1230
0
                             coeff_ptr,
1231
0
                             qcoeff_ptr,
1232
0
                             dqcoeff_ptr,
1233
0
                             levels,
1234
0
                             qparam->iqmatrix);
1235
0
    }
1236
11.6k
}
1237
1238
1.41M
static INLINE TxSize aom_av1_get_adjusted_tx_size(TxSize tx_size) {
1239
1.41M
    switch (tx_size) {
1240
3.49k
    case TX_64X64:
1241
5.99k
    case TX_64X32:
1242
5.99k
    case TX_32X64:
1243
5.99k
        return TX_32X32;
1244
0
    case TX_64X16:
1245
0
        return TX_32X16;
1246
0
    case TX_16X64:
1247
0
        return TX_16X32;
1248
1.40M
    default:
1249
1.40M
        return tx_size;
1250
1.41M
    }
1251
1.41M
}
1252
1253
void svt_aom_quantize_inv_quantize_light(PictureControlSet* pcs, int32_t* coeff, int32_t* quant_coeff,
1254
                                         int32_t* recon_coeff, uint32_t qindex, TxSize txsize, uint16_t* eob,
1255
8.21k
                                         uint32_t bit_depth, TxType tx_type) {
1256
8.21k
    EncodeContext* enc_ctx = pcs->scs->enc_ctx;
1257
1258
8.21k
    uint32_t q_index = qindex;
1259
1260
8.21k
    const ScanOrder* const scan_order = get_scan_order(txsize, tx_type);
1261
1262
8.21k
    const int32_t n_coeffs = av1_get_max_eob(txsize);
1263
1264
8.21k
    int32_t qmatrix_level = (IS_2D_TRANSFORM(tx_type) && pcs->ppcs->frm_hdr.quantization_params.using_qmatrix)
1265
1266
8.21k
        ? pcs->ppcs->frm_hdr.quantization_params.qm[PLANE_Y]
1267
1268
8.21k
        : NUM_QM_LEVELS - 1;
1269
1270
8.21k
    TxSize adjusted_tx_size = aom_av1_get_adjusted_tx_size(txsize);
1271
1272
8.21k
    const QmVal* q_matrix = pcs->ppcs->gqmatrix[qmatrix_level][PLANE_Y][adjusted_tx_size];
1273
1274
8.21k
    const QmVal* iq_matrix = pcs->ppcs->giqmatrix[qmatrix_level][PLANE_Y][adjusted_tx_size];
1275
1276
8.21k
    if (q_matrix == NULL && iq_matrix == NULL) {
1277
8.21k
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1278
8.21k
        if (bit_depth > EB_EIGHT_BIT) {
1279
0
            svt_aom_highbd_quantize_b((TranLow*)coeff,
1280
0
                                      n_coeffs,
1281
0
                                      enc_ctx->quants_bd.y_zbin[q_index],
1282
0
                                      enc_ctx->quants_bd.y_round[q_index],
1283
0
                                      enc_ctx->quants_bd.y_quant[q_index],
1284
0
                                      enc_ctx->quants_bd.y_quant_shift[q_index],
1285
0
                                      quant_coeff,
1286
0
                                      (TranLow*)recon_coeff,
1287
0
                                      enc_ctx->deq_bd.v_dequant_qtx[q_index],
1288
0
                                      eob,
1289
0
                                      scan_order->scan,
1290
0
                                      scan_order->iscan,
1291
0
                                      q_matrix,
1292
0
                                      iq_matrix,
1293
0
                                      av1_get_tx_scale_tab[txsize]);
1294
0
        } else
1295
#else
1296
        UNUSED(bit_depth);
1297
#endif
1298
8.21k
        {
1299
8.21k
            svt_aom_quantize_b((TranLow*)coeff,
1300
8.21k
                               n_coeffs,
1301
8.21k
                               enc_ctx->quants_8bit.v_zbin[q_index],
1302
8.21k
                               enc_ctx->quants_8bit.v_round[q_index],
1303
8.21k
                               enc_ctx->quants_8bit.v_quant[q_index],
1304
8.21k
                               enc_ctx->quants_8bit.v_quant_shift[q_index],
1305
8.21k
                               quant_coeff,
1306
8.21k
                               (TranLow*)recon_coeff,
1307
8.21k
                               enc_ctx->deq_8bit.y_dequant_qtx[q_index],
1308
8.21k
                               eob,
1309
8.21k
                               scan_order->scan,
1310
8.21k
                               scan_order->iscan,
1311
8.21k
                               q_matrix,
1312
8.21k
                               iq_matrix,
1313
8.21k
                               av1_get_tx_scale_tab[txsize]);
1314
8.21k
        }
1315
8.21k
    } else {
1316
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1317
0
        if (bit_depth > EB_EIGHT_BIT) {
1318
0
            svt_av1_highbd_quantize_b_qm((TranLow*)coeff,
1319
0
                                         n_coeffs,
1320
0
                                         enc_ctx->quants_bd.y_zbin[q_index],
1321
0
                                         enc_ctx->quants_bd.y_round[q_index],
1322
0
                                         enc_ctx->quants_bd.y_quant[q_index],
1323
0
                                         enc_ctx->quants_bd.y_quant_shift[q_index],
1324
0
                                         quant_coeff,
1325
0
                                         (TranLow*)recon_coeff,
1326
0
                                         enc_ctx->deq_bd.v_dequant_qtx[q_index],
1327
0
                                         eob,
1328
0
                                         scan_order->scan,
1329
0
                                         scan_order->iscan,
1330
0
                                         q_matrix,
1331
0
                                         iq_matrix,
1332
0
                                         av1_get_tx_scale_tab[txsize]);
1333
0
        } else
1334
0
#endif
1335
0
        {
1336
0
            svt_av1_quantize_b_qm((TranLow*)coeff,
1337
0
                                  n_coeffs,
1338
0
                                  enc_ctx->quants_8bit.v_zbin[q_index],
1339
0
                                  enc_ctx->quants_8bit.v_round[q_index],
1340
0
                                  enc_ctx->quants_8bit.v_quant[q_index],
1341
0
                                  enc_ctx->quants_8bit.v_quant_shift[q_index],
1342
0
                                  quant_coeff,
1343
0
                                  (TranLow*)recon_coeff,
1344
0
                                  enc_ctx->deq_8bit.y_dequant_qtx[q_index],
1345
0
                                  eob,
1346
0
                                  scan_order->scan,
1347
0
                                  scan_order->iscan,
1348
0
                                  q_matrix,
1349
0
                                  iq_matrix,
1350
0
                                  av1_get_tx_scale_tab[txsize]);
1351
0
        }
1352
0
    }
1353
8.21k
}
1354
1355
// See av1_get_txb_entropy_context in libaom
1356
0
uint8_t svt_av1_compute_cul_level_c(const int16_t* const scan, const int32_t* const quant_coeff, uint16_t* eob) {
1357
0
    int32_t cul_level = 0;
1358
0
    for (int32_t c = 0; c < *eob; ++c) {
1359
0
        const int16_t pos   = scan[c];
1360
0
        const int32_t v     = quant_coeff[pos];
1361
0
        int32_t       level = ABS(v);
1362
0
        cul_level += level;
1363
        // Early exit the loop if cul_level reaches COEFF_CONTEXT_MASK
1364
0
        if (cul_level >= COEFF_CONTEXT_MASK) {
1365
0
            break;
1366
0
        }
1367
0
    }
1368
1369
0
    cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
1370
    // DC value
1371
0
    set_dc_sign(&cul_level, quant_coeff[0]);
1372
0
    return (uint8_t)cul_level;
1373
0
}
1374
1375
#if OPT_COEFF_SHAVING
1376
1377
// Retract EOB by removing trailing low-magnitude coefficients separated by zero gaps,
1378
// then compute energy on the reduced block and optionally zero it entirely if energy is low.
1379
// Returns the updated EOB (0 = block became skip).
1380
static INLINE uint16_t shave_coeff(int32_t* quant_buf, int32_t* recon_buf, uint16_t eob, TxSize tx_size, TxType tx_type,
1381
0
                                   const CoeffShavingCtrls* ctrls) {
1382
0
    const int16_t* const scan = get_scan_order(tx_size, tx_type)->scan;
1383
1384
0
    const int level_th = ctrls->level_threshold;
1385
0
    const int gap_th   = ctrls->zero_gap_threshold;
1386
1387
0
    int updated_eob = (int)eob;
1388
1389
    // -------------------------
1390
    // Phase 1: EOB retraction
1391
    // -------------------------
1392
0
    while (updated_eob > 1) {
1393
0
        const int     last_pos = scan[updated_eob - 1];
1394
0
        const int32_t val      = quant_buf[last_pos];
1395
0
        const int32_t abs_val  = (val >= 0) ? val : -val;
1396
1397
0
        if (abs_val > level_th) {
1398
0
            break;
1399
0
        }
1400
1401
        // Find previous non-zero coefficient
1402
0
        int next_nz = updated_eob - 2;
1403
0
        while (next_nz >= 0 && quant_buf[scan[next_nz]] == 0) {
1404
0
            --next_nz;
1405
0
        }
1406
1407
0
        if (next_nz < 0) {
1408
0
            break;
1409
0
        }
1410
1411
        // Gap check
1412
0
        const int gap = (updated_eob - 1) - next_nz - 1;
1413
0
        if (gap < gap_th) {
1414
0
            break;
1415
0
        }
1416
1417
        // Zero trailing coefficient
1418
0
        quant_buf[last_pos] = 0;
1419
0
        recon_buf[last_pos] = 0;
1420
1421
0
        updated_eob = next_nz + 1;
1422
0
    }
1423
1424
    // -------------------------
1425
    // Phase 2: energy check (post-shaving)
1426
    // -------------------------
1427
0
    const int skip_th = ctrls->skip_energy_threshold;
1428
0
    if (skip_th > 0 && updated_eob > 0) {
1429
0
        int32_t total_energy = 0;
1430
1431
0
        for (int c = 0; c < updated_eob; ++c) {
1432
0
            const int32_t v = quant_buf[scan[c]];
1433
0
            total_energy += (v >= 0) ? v : -v;
1434
1435
0
            if (total_energy > skip_th) {
1436
0
                break;
1437
0
            }
1438
0
        }
1439
1440
0
        if (total_energy <= skip_th) {
1441
            // Zero entire block
1442
0
            for (int c = 0; c < updated_eob; ++c) {
1443
0
                const int pos  = scan[c];
1444
0
                quant_buf[pos] = 0;
1445
0
                recon_buf[pos] = 0;
1446
0
            }
1447
0
            return 0;
1448
0
        }
1449
0
    }
1450
1451
0
    return (uint16_t)updated_eob;
1452
0
}
1453
1454
#endif
1455
1456
uint8_t svt_aom_quantize_inv_quantize(PictureControlSet* pcs, ModeDecisionContext* ctx, int32_t* coeff,
1457
                                      int32_t* quant_coeff, int32_t* recon_coeff, uint32_t qindex,
1458
                                      int32_t segmentation_qp_offset, TxSize txsize, uint16_t* eob,
1459
                                      uint32_t component_type, uint32_t bit_depth, TxType tx_type,
1460
                                      int16_t txb_skip_context, int16_t dc_sign_context, PredictionMode pred_mode,
1461
1.40M
                                      uint32_t lambda, bool is_encode_pass) {
1462
1.40M
    SequenceControlSet* scs     = pcs->scs;
1463
1.40M
    EncodeContext*      enc_ctx = scs->enc_ctx;
1464
1.40M
    int32_t             plane   = component_type == COMPONENT_LUMA ? PLANE_Y
1465
1.40M
                                                                   : (component_type == COMPONENT_CHROMA_CB ? PLANE_U : PLANE_V);
1466
1467
1.40M
    int32_t qmatrix_level = (IS_2D_TRANSFORM(tx_type) && pcs->ppcs->frm_hdr.quantization_params.using_qmatrix)
1468
1.40M
        ? pcs->ppcs->frm_hdr.quantization_params.qm[plane]
1469
1.40M
        : NUM_QM_LEVELS - 1;
1470
1471
1.40M
    TxSize          adjusted_tx_size = aom_av1_get_adjusted_tx_size(txsize);
1472
1.40M
    MacroblockPlane candidate_plane;
1473
1.40M
    const QmVal*    q_matrix  = pcs->ppcs->gqmatrix[qmatrix_level][plane][adjusted_tx_size];
1474
1.40M
    const QmVal*    iq_matrix = pcs->ppcs->giqmatrix[qmatrix_level][plane][adjusted_tx_size];
1475
1.40M
    int32_t         q_index   = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present
1476
1.40M
                  ? qindex
1477
1.40M
                  : pcs->ppcs->frm_hdr.quantization_params.base_q_idx;
1478
1.40M
    if (segmentation_qp_offset != 0) {
1479
0
        q_index = CLIP3(0, 255, q_index + segmentation_qp_offset);
1480
0
    }
1481
1.40M
    if (component_type != COMPONENT_LUMA) {
1482
292k
        const int8_t offset = (component_type == COMPONENT_CHROMA_CB)
1483
292k
            ? pcs->ppcs->frm_hdr.quantization_params.delta_q_dc[1] // we are assuming delta_q_ac == delta_q_dc
1484
292k
            : pcs->ppcs->frm_hdr.quantization_params.delta_q_dc[2];
1485
292k
        q_index += offset;
1486
292k
        q_index = (uint32_t)CLIP3(0, 255, (int32_t)q_index);
1487
292k
    }
1488
1.40M
    if (bit_depth == EB_EIGHT_BIT) {
1489
1.40M
        if (component_type == COMPONENT_LUMA) {
1490
1.11M
            candidate_plane.quant_qtx       = enc_ctx->quants_8bit.y_quant[q_index];
1491
1.11M
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_8bit.y_quant_fp[q_index];
1492
1.11M
            candidate_plane.round_fp_qtx    = enc_ctx->quants_8bit.y_round_fp[q_index];
1493
1.11M
            candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.y_quant_shift[q_index];
1494
1.11M
            candidate_plane.zbin_qtx        = enc_ctx->quants_8bit.y_zbin[q_index];
1495
1.11M
            candidate_plane.round_qtx       = enc_ctx->quants_8bit.y_round[q_index];
1496
1.11M
            candidate_plane.dequant_qtx     = enc_ctx->deq_8bit.y_dequant_qtx[q_index];
1497
1.11M
        } else if (component_type == COMPONENT_CHROMA_CB) {
1498
146k
            candidate_plane.quant_qtx       = enc_ctx->quants_8bit.u_quant[q_index];
1499
146k
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_8bit.u_quant_fp[q_index];
1500
146k
            candidate_plane.round_fp_qtx    = enc_ctx->quants_8bit.u_round_fp[q_index];
1501
146k
            candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.u_quant_shift[q_index];
1502
146k
            candidate_plane.zbin_qtx        = enc_ctx->quants_8bit.u_zbin[q_index];
1503
146k
            candidate_plane.round_qtx       = enc_ctx->quants_8bit.u_round[q_index];
1504
146k
            candidate_plane.dequant_qtx     = enc_ctx->deq_8bit.u_dequant_qtx[q_index];
1505
146k
        }
1506
1507
146k
        else {
1508
146k
            candidate_plane.quant_qtx       = enc_ctx->quants_8bit.v_quant[q_index];
1509
146k
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_8bit.v_quant_fp[q_index];
1510
146k
            candidate_plane.round_fp_qtx    = enc_ctx->quants_8bit.v_round_fp[q_index];
1511
146k
            candidate_plane.quant_shift_qtx = enc_ctx->quants_8bit.v_quant_shift[q_index];
1512
146k
            candidate_plane.zbin_qtx        = enc_ctx->quants_8bit.v_zbin[q_index];
1513
146k
            candidate_plane.round_qtx       = enc_ctx->quants_8bit.v_round[q_index];
1514
146k
            candidate_plane.dequant_qtx     = enc_ctx->deq_8bit.v_dequant_qtx[q_index];
1515
146k
        }
1516
18.4E
    } else {
1517
18.4E
        if (component_type == COMPONENT_LUMA) {
1518
0
            candidate_plane.quant_qtx       = enc_ctx->quants_bd.y_quant[q_index];
1519
0
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_bd.y_quant_fp[q_index];
1520
0
            candidate_plane.round_fp_qtx    = enc_ctx->quants_bd.y_round_fp[q_index];
1521
0
            candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.y_quant_shift[q_index];
1522
0
            candidate_plane.zbin_qtx        = enc_ctx->quants_bd.y_zbin[q_index];
1523
0
            candidate_plane.round_qtx       = enc_ctx->quants_bd.y_round[q_index];
1524
0
            candidate_plane.dequant_qtx     = enc_ctx->deq_bd.y_dequant_qtx[q_index];
1525
0
        }
1526
1527
18.4E
        else if (component_type == COMPONENT_CHROMA_CB) {
1528
0
            candidate_plane.quant_qtx       = enc_ctx->quants_bd.u_quant[q_index];
1529
0
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_bd.u_quant_fp[q_index];
1530
0
            candidate_plane.round_fp_qtx    = enc_ctx->quants_bd.u_round_fp[q_index];
1531
0
            candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.u_quant_shift[q_index];
1532
0
            candidate_plane.zbin_qtx        = enc_ctx->quants_bd.u_zbin[q_index];
1533
0
            candidate_plane.round_qtx       = enc_ctx->quants_bd.u_round[q_index];
1534
0
            candidate_plane.dequant_qtx     = enc_ctx->deq_bd.u_dequant_qtx[q_index];
1535
0
        }
1536
1537
18.4E
        else {
1538
18.4E
            candidate_plane.quant_qtx       = enc_ctx->quants_bd.v_quant[q_index];
1539
18.4E
            candidate_plane.quant_fp_qtx    = enc_ctx->quants_bd.v_quant_fp[q_index];
1540
18.4E
            candidate_plane.round_fp_qtx    = enc_ctx->quants_bd.v_round_fp[q_index];
1541
18.4E
            candidate_plane.quant_shift_qtx = enc_ctx->quants_bd.v_quant_shift[q_index];
1542
18.4E
            candidate_plane.zbin_qtx        = enc_ctx->quants_bd.v_zbin[q_index];
1543
18.4E
            candidate_plane.round_qtx       = enc_ctx->quants_bd.v_round[q_index];
1544
18.4E
            candidate_plane.dequant_qtx     = enc_ctx->deq_bd.v_dequant_qtx[q_index];
1545
18.4E
        }
1546
18.4E
    }
1547
1548
1.40M
    const ScanOrder* const scan_order = get_scan_order(txsize, tx_type);
1549
1550
1.40M
    const int32_t n_coeffs = av1_get_max_eob(txsize);
1551
1552
1.40M
    QuantParam qparam;
1553
1554
1.40M
    qparam.log_scale = av1_get_tx_scale_tab[txsize];
1555
1.40M
    qparam.tx_size   = txsize;
1556
1.40M
    qparam.qmatrix   = q_matrix;
1557
1.40M
    qparam.iqmatrix  = iq_matrix;
1558
1559
1.40M
    bool is_inter = (pred_mode >= NEARESTMV);
1560
1.40M
    bool perform_rdoq;
1561
1562
    // If rdoq_level is specified in the command line instruction, set perform_rdoq accordingly.
1563
1.40M
    perform_rdoq = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) &&
1564
24.6k
        ((ctx->mds_do_rdoq || is_encode_pass) && ctx->rdoq_ctrls.enabled);
1565
1.40M
    if (!is_encode_pass) {
1566
1.40M
        if ((ctx->rdoq_ctrls.dct_dct_only && tx_type != DCT_DCT) ||
1567
1.40M
            (ctx->rdoq_ctrls.skip_uv && component_type != COMPONENT_LUMA)) {
1568
0
            perform_rdoq = 0;
1569
0
        }
1570
1.40M
    }
1571
1.40M
    if (perform_rdoq) {
1572
24.6k
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1573
24.6k
        if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) {
1574
0
            svt_av1_highbd_quantize_fp_facade((TranLow*)coeff,
1575
0
                                              n_coeffs,
1576
0
                                              &candidate_plane,
1577
0
                                              quant_coeff,
1578
0
                                              (TranLow*)recon_coeff,
1579
0
                                              eob,
1580
0
                                              scan_order,
1581
0
                                              &qparam);
1582
0
        } else
1583
24.6k
#endif
1584
24.6k
        {
1585
24.6k
            svt_av1_quantize_fp_facade((TranLow*)coeff,
1586
24.6k
                                       n_coeffs,
1587
24.6k
                                       &candidate_plane,
1588
24.6k
                                       quant_coeff,
1589
24.6k
                                       (TranLow*)recon_coeff,
1590
24.6k
                                       eob,
1591
24.6k
                                       scan_order,
1592
24.6k
                                       &qparam);
1593
24.6k
        }
1594
1.37M
    } else {
1595
1.37M
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1596
1.38M
        if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) {
1597
0
            svt_av1_highbd_quantize_b_facade((TranLow*)coeff,
1598
0
                                             n_coeffs,
1599
0
                                             &candidate_plane,
1600
0
                                             quant_coeff,
1601
0
                                             (TranLow*)recon_coeff,
1602
0
                                             eob,
1603
0
                                             scan_order,
1604
0
                                             &qparam);
1605
0
        } else
1606
1.37M
#endif
1607
1.37M
        {
1608
1.37M
            av1_quantize_b_facade_ii((TranLow*)coeff,
1609
1.37M
                                     n_coeffs,
1610
1.37M
                                     &candidate_plane,
1611
1.37M
                                     quant_coeff,
1612
1.37M
                                     (TranLow*)recon_coeff,
1613
1.37M
                                     eob,
1614
1.37M
                                     scan_order,
1615
1.37M
                                     &qparam);
1616
1.37M
        }
1617
1.37M
    }
1618
1.40M
    if (perform_rdoq && *eob != 0) {
1619
11.6k
        int width    = tx_size_wide[txsize];
1620
11.6k
        int height   = tx_size_high[txsize];
1621
11.6k
        int eob_perc = (*eob) * 100 / (width * height);
1622
11.6k
        if (eob_perc >= ctx->rdoq_ctrls.eob_th) {
1623
0
            perform_rdoq = 0;
1624
0
        }
1625
11.6k
        if (perform_rdoq && (eob_perc >= ctx->rdoq_ctrls.eob_fast_th)) {
1626
0
            svt_fast_optimize_b(
1627
0
                (TranLow*)coeff, &candidate_plane, quant_coeff, (TranLow*)recon_coeff, eob, txsize, tx_type);
1628
0
        }
1629
11.6k
        if (perform_rdoq == 0) {
1630
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
1631
0
            if ((bit_depth > EB_EIGHT_BIT) || (is_encode_pass && scs->is_16bit_pipeline)) {
1632
0
                svt_av1_highbd_quantize_b_facade((TranLow*)coeff,
1633
0
                                                 n_coeffs,
1634
0
                                                 &candidate_plane,
1635
0
                                                 quant_coeff,
1636
0
                                                 (TranLow*)recon_coeff,
1637
0
                                                 eob,
1638
0
                                                 scan_order,
1639
0
                                                 &qparam);
1640
0
            } else
1641
0
#endif
1642
0
            {
1643
0
                av1_quantize_b_facade_ii((TranLow*)coeff,
1644
0
                                         n_coeffs,
1645
0
                                         &candidate_plane,
1646
0
                                         quant_coeff,
1647
0
                                         (TranLow*)recon_coeff,
1648
0
                                         eob,
1649
0
                                         scan_order,
1650
0
                                         &qparam);
1651
0
            }
1652
0
        }
1653
11.6k
    }
1654
1.40M
    if (perform_rdoq && *eob != 0) {
1655
        // Perform rdoq
1656
11.6k
        svt_av1_optimize_b(pcs,
1657
11.6k
                           ctx,
1658
11.6k
                           txb_skip_context,
1659
11.6k
                           dc_sign_context,
1660
11.6k
                           (TranLow*)coeff,
1661
11.6k
                           &candidate_plane,
1662
11.6k
                           quant_coeff,
1663
11.6k
                           (TranLow*)recon_coeff,
1664
11.6k
                           eob,
1665
11.6k
                           &qparam,
1666
11.6k
                           txsize,
1667
11.6k
                           tx_type,
1668
11.6k
                           is_inter,
1669
11.6k
                           scs->vq_ctrls.sharpness_ctrls.rdoq,
1670
11.6k
                           pcs->ppcs->frm_hdr.delta_q_params.delta_q_present,
1671
11.6k
                           pcs->ppcs->picture_qp,
1672
11.6k
                           lambda,
1673
11.6k
                           (component_type == COMPONENT_LUMA) ? 0 : 1);
1674
11.6k
    }
1675
1676
1.40M
#if OPT_COEFF_SHAVING
1677
    // Apply coefficient shaving for luma after all quantization/RDOQ is complete.
1678
    // This catches all luma quantize paths (light PD1, regular TX, encode pass)
1679
    // in a single place.
1680
1.40M
    if (component_type == COMPONENT_LUMA && ctx->coeff_shaving_ctrls.enabled && *eob > 1) {
1681
0
        *eob = shave_coeff(quant_coeff, recon_coeff, *eob, txsize, tx_type, &ctx->coeff_shaving_ctrls);
1682
0
    }
1683
1.40M
#endif
1684
1685
1.40M
    if (!ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) {
1686
1.38M
        return 0;
1687
1.38M
    }
1688
1689
    // Derive cul_level
1690
19.8k
    return svt_av1_compute_cul_level(scan_order->scan, quant_coeff, eob);
1691
1.40M
}
1692
1693
void svt_aom_inv_transform_recon_wrapper(PictureControlSet* pcs, ModeDecisionContext* ctx, uint8_t* pred_buffer,
1694
                                         uint32_t pred_offset, uint32_t pred_stride, uint8_t* rec_buffer,
1695
                                         uint32_t rec_offset, uint32_t rec_stride, int32_t* rec_coeff_buffer,
1696
                                         uint32_t coeff_offset, bool hbd, TxSize txsize, TxType transform_type,
1697
42.4k
                                         PlaneType component_type, uint32_t eob) {
1698
42.4k
    if (hbd) {
1699
0
        svt_aom_inv_transform_recon(rec_coeff_buffer + coeff_offset,
1700
0
                                    CONVERT_TO_BYTEPTR(((uint16_t*)pred_buffer) + pred_offset),
1701
0
                                    pred_stride,
1702
0
                                    CONVERT_TO_BYTEPTR(((uint16_t*)rec_buffer) + rec_offset),
1703
0
                                    rec_stride,
1704
0
                                    txsize,
1705
0
                                    EB_TEN_BIT,
1706
0
                                    transform_type,
1707
0
                                    component_type,
1708
0
                                    eob,
1709
0
                                    svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id));
1710
42.4k
    } else {
1711
42.4k
        svt_aom_inv_transform_recon8bit(rec_coeff_buffer + coeff_offset,
1712
42.4k
                                        pred_buffer + pred_offset,
1713
42.4k
                                        pred_stride,
1714
42.4k
                                        rec_buffer + rec_offset,
1715
42.4k
                                        rec_stride,
1716
42.4k
                                        txsize,
1717
42.4k
                                        transform_type,
1718
42.4k
                                        component_type,
1719
42.4k
                                        eob,
1720
42.4k
                                        svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id));
1721
42.4k
    }
1722
42.4k
}
1723
1724
/*
1725
  tx path for light PD1 chroma
1726
*/
1727
void svt_aom_full_loop_chroma_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
1728
                                        ModeDecisionCandidateBuffer* cand_bf, EbPictureBufferDesc* input_pic,
1729
                                        uint32_t input_cb_origin_in_index, uint32_t blk_chroma_origin_index,
1730
                                        COMPONENT_TYPE component_type, uint32_t chroma_qindex,
1731
                                        uint64_t cb_full_distortion[DIST_CALC_TOTAL],
1732
                                        uint64_t cr_full_distortion[DIST_CALC_TOTAL], uint64_t* cb_coeff_bits,
1733
0
                                        uint64_t* cr_coeff_bits) {
1734
0
    uint32_t     full_lambda  = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
1735
0
    const TxSize tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
1736
0
    const int    tx_width_uv  = tx_size_wide[tx_size_uv];
1737
0
    const int    tx_height_uv = tx_size_high[tx_size_uv];
1738
1739
0
    TxCoeffShape pf_shape = ctx->pf_ctrls.pf_shape;
1740
    // If Cb component not detected as complex, can use TX shortcuts
1741
0
    if (ctx->use_tx_shortcuts_mds3 &&
1742
0
        (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CR)) {
1743
0
        pf_shape = N4_SHAPE;
1744
0
    } else {
1745
0
        uint8_t use_pfn4_cond = 0;
1746
0
        if (ctx->lpd1_tx_ctrls.use_uv_shortcuts_on_y_coeffs &&
1747
0
            (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CR)) {
1748
0
            const uint16_t th = ((tx_width_uv >> 4) * (tx_height_uv >> 4));
1749
0
            use_pfn4_cond     = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0;
1750
0
        }
1751
0
        if (use_pfn4_cond) {
1752
0
            pf_shape = N4_SHAPE;
1753
0
        }
1754
0
    }
1755
0
    assert(tx_size_uv < TX_SIZES_ALL);
1756
0
    const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2;
1757
0
    uint32_t      bwidth       = tx_width_uv;
1758
0
    uint32_t      bheight      = tx_height_uv;
1759
0
    if (pf_shape) {
1760
0
        bwidth  = MAX((bwidth >> pf_shape), 4);
1761
0
        bheight = (bheight >> pf_shape);
1762
0
    }
1763
0
    if (component_type == COMPONENT_CHROMA || component_type == COMPONENT_CHROMA_CB) {
1764
0
        svt_aom_residual_kernel(input_pic->u_buffer,
1765
0
                                input_cb_origin_in_index,
1766
0
                                input_pic->u_stride,
1767
0
                                cand_bf->pred->u_buffer,
1768
0
                                blk_chroma_origin_index,
1769
0
                                cand_bf->pred->u_stride,
1770
0
                                (int16_t*)cand_bf->residual->u_buffer,
1771
0
                                blk_chroma_origin_index,
1772
0
                                cand_bf->residual->u_stride,
1773
0
                                ctx->hbd_md,
1774
0
                                ctx->blk_geom->bwidth_uv,
1775
0
                                ctx->blk_geom->bheight_uv);
1776
1777
        // Cb Transform
1778
0
        svt_aom_estimate_transform(pcs,
1779
0
                                   ctx,
1780
0
                                   &(((int16_t*)cand_bf->residual->u_buffer)[blk_chroma_origin_index]),
1781
0
                                   cand_bf->residual->u_stride,
1782
0
                                   &(((int32_t*)ctx->tx_coeffs->u_buffer)[0]),
1783
0
                                   NOT_USED_VALUE,
1784
0
                                   tx_size_uv,
1785
0
                                   &ctx->three_quad_energy,
1786
0
                                   ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1787
0
                                   cand_bf->cand->transform_type_uv,
1788
0
                                   PLANE_TYPE_UV,
1789
0
                                   pf_shape);
1790
0
        cand_bf->quant_dc.u[0] = svt_aom_quantize_inv_quantize(pcs,
1791
0
                                                               ctx,
1792
0
                                                               &(((int32_t*)ctx->tx_coeffs->u_buffer)[0]),
1793
0
                                                               &(((int32_t*)cand_bf->quant->u_buffer)[0]),
1794
0
                                                               &(((int32_t*)cand_bf->rec_coeff->u_buffer)[0]),
1795
0
                                                               chroma_qindex,
1796
0
                                                               0,
1797
0
                                                               tx_size_uv,
1798
0
                                                               &cand_bf->eob.u[0],
1799
0
                                                               COMPONENT_CHROMA_CB,
1800
0
                                                               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1801
0
                                                               cand_bf->cand->transform_type_uv,
1802
0
                                                               0,
1803
0
                                                               0,
1804
0
                                                               cand_bf->cand->block_mi.mode,
1805
0
                                                               full_lambda,
1806
0
                                                               false);
1807
1808
0
        svt_aom_picture_full_distortion32_bits_single(&(((int32_t*)ctx->tx_coeffs->u_buffer)[0]),
1809
0
                                                      &(((int32_t*)cand_bf->rec_coeff->u_buffer)[0]),
1810
0
                                                      tx_width_uv,
1811
0
                                                      bwidth,
1812
0
                                                      bheight,
1813
0
                                                      cb_full_distortion,
1814
0
                                                      cand_bf->eob.u[0]);
1815
0
        cb_full_distortion[DIST_CALC_RESIDUAL]   = RIGHT_SIGNED_SHIFT(cb_full_distortion[DIST_CALC_RESIDUAL],
1816
0
                                                                    chroma_shift);
1817
0
        cb_full_distortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(cb_full_distortion[DIST_CALC_PREDICTION],
1818
0
                                                                      chroma_shift);
1819
0
        cand_bf->u_has_coeff                     = (cand_bf->eob.u[0] > 0);
1820
0
    }
1821
1822
0
    pf_shape = ctx->pf_ctrls.pf_shape;
1823
    // If Cr component not detected as complex, can use TX shortcuts
1824
0
    if (ctx->use_tx_shortcuts_mds3 &&
1825
0
        (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CB)) {
1826
0
        pf_shape = N4_SHAPE;
1827
0
    } else {
1828
0
        uint8_t use_pfn4_cond = 0;
1829
0
        if (ctx->lpd1_tx_ctrls.use_uv_shortcuts_on_y_coeffs &&
1830
0
            (ctx->chroma_complexity == COMPONENT_LUMA || ctx->chroma_complexity == COMPONENT_CHROMA_CB)) {
1831
0
            const uint16_t th = ((tx_width_uv >> 4) * (tx_height_uv >> 4));
1832
0
            use_pfn4_cond     = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0;
1833
0
        }
1834
0
        if (use_pfn4_cond) {
1835
0
            pf_shape = N4_SHAPE;
1836
0
        }
1837
0
    }
1838
0
    bwidth  = tx_width_uv;
1839
0
    bheight = tx_height_uv;
1840
0
    if (pf_shape) {
1841
0
        bwidth  = MAX((bwidth >> pf_shape), 4);
1842
0
        bheight = (bheight >> pf_shape);
1843
0
    }
1844
1845
0
    if (component_type == COMPONENT_CHROMA || component_type == COMPONENT_CHROMA_CR) {
1846
        //Cr Residual
1847
0
        svt_aom_residual_kernel(input_pic->v_buffer,
1848
0
                                input_cb_origin_in_index,
1849
0
                                input_pic->v_stride,
1850
0
                                cand_bf->pred->v_buffer,
1851
0
                                blk_chroma_origin_index,
1852
0
                                cand_bf->pred->v_stride,
1853
0
                                (int16_t*)cand_bf->residual->v_buffer,
1854
0
                                blk_chroma_origin_index,
1855
0
                                cand_bf->residual->v_stride,
1856
0
                                ctx->hbd_md,
1857
0
                                ctx->blk_geom->bwidth_uv,
1858
0
                                ctx->blk_geom->bheight_uv);
1859
        // Cr Transform
1860
0
        svt_aom_estimate_transform(pcs,
1861
0
                                   ctx,
1862
0
                                   &(((int16_t*)cand_bf->residual->v_buffer)[blk_chroma_origin_index]),
1863
0
                                   cand_bf->residual->v_stride,
1864
0
                                   &(((int32_t*)ctx->tx_coeffs->v_buffer)[0]),
1865
0
                                   NOT_USED_VALUE,
1866
0
                                   tx_size_uv,
1867
0
                                   &ctx->three_quad_energy,
1868
0
                                   ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1869
0
                                   cand_bf->cand->transform_type_uv,
1870
0
                                   PLANE_TYPE_UV,
1871
0
                                   pf_shape);
1872
0
        cand_bf->quant_dc.v[0] = svt_aom_quantize_inv_quantize(pcs,
1873
0
                                                               ctx,
1874
0
                                                               &(((int32_t*)ctx->tx_coeffs->v_buffer)[0]),
1875
0
                                                               &(((int32_t*)cand_bf->quant->v_buffer)[0]),
1876
0
                                                               &(((int32_t*)cand_bf->rec_coeff->v_buffer)[0]),
1877
0
                                                               chroma_qindex,
1878
0
                                                               0,
1879
0
                                                               tx_size_uv,
1880
0
                                                               &cand_bf->eob.v[0],
1881
0
                                                               COMPONENT_CHROMA_CR,
1882
0
                                                               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
1883
0
                                                               cand_bf->cand->transform_type_uv,
1884
0
                                                               0,
1885
0
                                                               0,
1886
0
                                                               cand_bf->cand->block_mi.mode,
1887
0
                                                               full_lambda,
1888
0
                                                               false);
1889
1890
0
        svt_aom_picture_full_distortion32_bits_single(&(((int32_t*)ctx->tx_coeffs->v_buffer)[0]),
1891
0
                                                      &(((int32_t*)cand_bf->rec_coeff->v_buffer)[0]),
1892
0
                                                      tx_width_uv,
1893
0
                                                      bwidth,
1894
0
                                                      bheight,
1895
0
                                                      cr_full_distortion,
1896
0
                                                      cand_bf->eob.v[0]);
1897
1898
0
        cr_full_distortion[DIST_CALC_RESIDUAL]   = RIGHT_SIGNED_SHIFT(cr_full_distortion[DIST_CALC_RESIDUAL],
1899
0
                                                                    chroma_shift);
1900
0
        cr_full_distortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(cr_full_distortion[DIST_CALC_PREDICTION],
1901
0
                                                                      chroma_shift);
1902
0
        cand_bf->v_has_coeff                     = (cand_bf->eob.v[0] > 0);
1903
0
    }
1904
1905
    //CHROMA-ONLY
1906
0
    svt_aom_txb_estimate_coeff_bits(ctx,
1907
0
                                    0,
1908
0
                                    NULL,
1909
0
                                    pcs,
1910
0
                                    cand_bf,
1911
0
                                    NOT_USED_VALUE,
1912
0
                                    0,
1913
0
                                    cand_bf->quant,
1914
0
                                    NOT_USED_VALUE,
1915
0
                                    cand_bf->eob.u[0],
1916
0
                                    cand_bf->eob.v[0],
1917
0
                                    NOT_USED_VALUE,
1918
0
                                    cb_coeff_bits,
1919
0
                                    cr_coeff_bits,
1920
0
                                    NOT_USED_VALUE,
1921
0
                                    tx_size_uv,
1922
0
                                    NOT_USED_VALUE,
1923
0
                                    cand_bf->cand->transform_type_uv,
1924
0
                                    component_type);
1925
0
}
1926
1927
/****************************************
1928
 ************  Full loop ****************
1929
****************************************/
1930
void svt_aom_full_loop_uv(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1931
                          EbPictureBufferDesc* input_pic, COMPONENT_TYPE component_type, uint32_t chroma_qindex,
1932
                          uint64_t cb_full_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1933
                          uint64_t cr_full_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* cb_coeff_bits,
1934
145k
                          uint64_t* cr_coeff_bits, bool is_full_loop) {
1935
145k
    EbSpatialFullDistType spatial_full_dist_type_fun = ctx->hbd_md ? svt_full_distortion_kernel16_bits
1936
145k
                                                                   : svt_spatial_full_distortion_kernel;
1937
145k
    EB_ALIGN(16) uint64_t txb_full_distortion[DIST_TOTAL][3][DIST_CALC_TOTAL];
1938
145k
    const SsimLevel       ssim_level = ctx->tune_ssim_level;
1939
145k
    if (ssim_level > SSIM_LVL_0) {
1940
0
        assert(ctx->pd_pass == PD_PASS_1);
1941
0
        assert(ctx->md_stage == MD_STAGE_3);
1942
0
    }
1943
145k
    cand_bf->u_has_coeff = 0;
1944
145k
    cand_bf->v_has_coeff = 0;
1945
145k
    int16_t* chroma_residual_ptr;
1946
145k
    uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
1947
1948
145k
    ctx->three_quad_energy = 0;
1949
1950
145k
    const double effective_ac_bias = get_effective_ac_bias(
1951
145k
        pcs->scs->static_config.ac_bias, pcs->slice_type == I_SLICE, pcs->temporal_layer_index);
1952
145k
    const uint8_t tx_depth     = cand_bf->cand->block_mi.tx_depth;
1953
145k
    const TxSize  tx_size      = av1_get_tx_size(ctx->blk_geom->bsize, tx_depth, PLANE_TYPE_Y);
1954
145k
    const TxSize  tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
1955
145k
    const int     tx_width_uv  = tx_size_wide[tx_size_uv];
1956
145k
    const int     tx_height_uv = tx_size_high[tx_size_uv];
1957
146k
    const bool    is_inter = (is_inter_mode(cand_bf->cand->block_mi.mode) || cand_bf->cand->block_mi.use_intrabc) ? true
1958
145k
                                                                                                                  : false;
1959
145k
    const int     tu_count = tx_depth ? 1 : tx_blocks_per_depth[ctx->blk_geom->bsize][tx_depth]; //NM: 128x128 exeption
1960
145k
    uint32_t      txb_1d_offset = 0;
1961
1962
145k
    int txb_itr = 0;
1963
145k
    do {
1964
145k
        const uint32_t txb_origin_x        = tx_org[ctx->blk_geom->bsize][is_inter][tx_depth][txb_itr].x;
1965
145k
        const uint32_t txb_origin_y        = tx_org[ctx->blk_geom->bsize][is_inter][tx_depth][txb_itr].y;
1966
145k
        int32_t        cropped_tx_width_uv = MIN(
1967
145k
            (uint32_t)tx_width_uv, (pcs->ppcs->aligned_width >> 1) - ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1));
1968
145k
        int32_t cropped_tx_height_uv = MIN(
1969
145k
            (uint32_t)tx_height_uv,
1970
145k
            (pcs->ppcs->aligned_height >> 1) - ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1));
1971
145k
        uint32_t tu_cb_origin_index = (ROUND_UV(txb_origin_x) +
1972
145k
                                       (ROUND_UV(txb_origin_y) * cand_bf->residual->u_stride)) >>
1973
145k
            1;
1974
145k
        uint32_t tu_cr_origin_index = (ROUND_UV(txb_origin_x) +
1975
145k
                                       (ROUND_UV(txb_origin_y) * cand_bf->residual->v_stride)) >>
1976
145k
            1;
1977
145k
        TxCoeffShape pf_shape = ctx->pf_ctrls.pf_shape;
1978
146k
        if (ctx->md_stage == MD_STAGE_3 && ctx->use_tx_shortcuts_mds3 && ctx->chroma_complexity == COMPONENT_LUMA) {
1979
0
            pf_shape = N4_SHAPE;
1980
0
        }
1981
        // for chroma path, use luma coeff info to make shortcut decisions (available even if MDS1 is skipped)
1982
145k
        else if (ctx->tx_shortcut_ctrls.apply_pf_on_coeffs && ctx->md_stage == MD_STAGE_3 &&
1983
0
                 ctx->chroma_complexity == COMPONENT_LUMA) {
1984
0
            uint8_t use_pfn4_cond = 0;
1985
1986
0
            const uint16_t th = (tx_width_uv >> 4) * (tx_height_uv >> 4);
1987
0
            use_pfn4_cond     = (cand_bf->cnt_nz_coeff < th) || !cand_bf->block_has_coeff ? 1 : 0;
1988
1989
0
            if (use_pfn4_cond) {
1990
0
                pf_shape = N4_SHAPE;
1991
0
            }
1992
0
        }
1993
        //    This function replaces the previous Intra Chroma mode if the LM fast
1994
        //    cost is better.
1995
        //    *Note - this might require that we have inv transform in the loop
1996
146k
        if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA ||
1997
146k
            component_type == COMPONENT_ALL) {
1998
146k
            ctx->cb_txb_skip_context = 0;
1999
146k
            ctx->cb_dc_sign_context  = 0;
2000
146k
            if (ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) {
2001
0
                svt_aom_get_txb_ctx(pcs,
2002
0
                                    COMPONENT_CHROMA,
2003
0
                                    ctx->cb_dc_sign_level_coeff_na,
2004
0
                                    ROUND_UV(ctx->blk_org_x + txb_origin_x) >> 1,
2005
0
                                    ROUND_UV(ctx->blk_org_y + txb_origin_y) >> 1,
2006
0
                                    ctx->blk_geom->bsize_uv,
2007
0
                                    tx_size_uv,
2008
0
                                    &ctx->cb_txb_skip_context,
2009
0
                                    &ctx->cb_dc_sign_context);
2010
0
            }
2011
            // Configure the Chroma Residual Ptr
2012
2013
146k
            chroma_residual_ptr = &(((int16_t*)cand_bf->residual->u_buffer)[tu_cb_origin_index]);
2014
2015
            // Cb Transform
2016
146k
            svt_aom_estimate_transform(pcs,
2017
146k
                                       ctx,
2018
146k
                                       chroma_residual_ptr,
2019
146k
                                       cand_bf->residual->u_stride,
2020
146k
                                       &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]),
2021
146k
                                       NOT_USED_VALUE,
2022
146k
                                       tx_size_uv,
2023
146k
                                       &ctx->three_quad_energy,
2024
146k
                                       ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2025
146k
                                       cand_bf->cand->transform_type_uv,
2026
146k
                                       PLANE_TYPE_UV,
2027
146k
                                       pf_shape);
2028
2029
146k
            int32_t seg_qp               = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled
2030
146k
                              ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q]
2031
146k
                              : 0;
2032
146k
            cand_bf->quant_dc.u[txb_itr] = svt_aom_quantize_inv_quantize(
2033
146k
                pcs,
2034
146k
                ctx,
2035
146k
                &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]),
2036
146k
                &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset]),
2037
146k
                &(((int32_t*)cand_bf->rec_coeff->u_buffer)[txb_1d_offset]),
2038
146k
                chroma_qindex,
2039
146k
                seg_qp,
2040
146k
                tx_size_uv,
2041
146k
                &cand_bf->eob.u[txb_itr],
2042
146k
                COMPONENT_CHROMA_CB,
2043
146k
                ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2044
146k
                cand_bf->cand->transform_type_uv,
2045
146k
                ctx->cb_txb_skip_context,
2046
146k
                ctx->cb_dc_sign_context,
2047
146k
                cand_bf->cand->block_mi.mode,
2048
146k
                full_lambda,
2049
146k
                false);
2050
2051
146k
            if (is_full_loop && ctx->mds_do_spatial_sse) {
2052
146k
                uint32_t cb_has_coeff = cand_bf->eob.u[txb_itr] > 0;
2053
2054
146k
                if (cb_has_coeff) {
2055
6.22k
                    svt_aom_inv_transform_recon_wrapper(pcs,
2056
6.22k
                                                        ctx,
2057
6.22k
                                                        cand_bf->pred->u_buffer,
2058
6.22k
                                                        tu_cb_origin_index,
2059
6.22k
                                                        cand_bf->pred->u_stride,
2060
6.22k
                                                        cand_bf->recon->u_buffer,
2061
6.22k
                                                        tu_cb_origin_index,
2062
6.22k
                                                        cand_bf->recon->u_stride,
2063
6.22k
                                                        (int32_t*)cand_bf->rec_coeff->u_buffer,
2064
6.22k
                                                        txb_1d_offset,
2065
6.22k
                                                        ctx->hbd_md,
2066
6.22k
                                                        tx_size_uv,
2067
6.22k
                                                        cand_bf->cand->transform_type_uv,
2068
6.22k
                                                        PLANE_TYPE_UV,
2069
6.22k
                                                        (uint32_t)cand_bf->eob.u[txb_itr]);
2070
139k
                } else {
2071
139k
                    svt_av1_picture_copy_cb(cand_bf->pred,
2072
139k
                                            tu_cb_origin_index,
2073
139k
                                            cand_bf->recon,
2074
139k
                                            tu_cb_origin_index,
2075
139k
                                            tx_width_uv,
2076
139k
                                            tx_height_uv,
2077
139k
                                            ctx->hbd_md);
2078
139k
                }
2079
2080
146k
                const uint32_t input_chroma_txb_origin_index = ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1) +
2081
146k
                    ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1) * input_pic->u_stride;
2082
146k
                const int32_t txb_uv_origin_index = (ROUND_UV(txb_origin_x) +
2083
146k
                                                     (ROUND_UV(txb_origin_y) * cand_bf->quant->u_stride)) >>
2084
146k
                    1;
2085
2086
146k
                if (ssim_level == SSIM_LVL_1 || ssim_level == SSIM_LVL_3) {
2087
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION] = svt_spatial_full_distortion_ssim_kernel(
2088
0
                        input_pic->u_buffer,
2089
0
                        input_chroma_txb_origin_index,
2090
0
                        input_pic->u_stride,
2091
0
                        cand_bf->pred->u_buffer,
2092
0
                        txb_uv_origin_index,
2093
0
                        cand_bf->pred->u_stride,
2094
0
                        cropped_tx_width_uv,
2095
0
                        cropped_tx_height_uv,
2096
0
                        ctx->hbd_md,
2097
0
                        effective_ac_bias);
2098
2099
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL] = svt_spatial_full_distortion_ssim_kernel(
2100
0
                        input_pic->u_buffer,
2101
0
                        input_chroma_txb_origin_index,
2102
0
                        input_pic->u_stride,
2103
0
                        cand_bf->recon->u_buffer,
2104
0
                        txb_uv_origin_index,
2105
0
                        cand_bf->recon->u_stride,
2106
0
                        cropped_tx_width_uv,
2107
0
                        cropped_tx_height_uv,
2108
0
                        ctx->hbd_md,
2109
0
                        effective_ac_bias);
2110
2111
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION] <<= 4;
2112
0
                    txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL] <<= 4;
2113
0
                }
2114
146k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
2115
146k
                    input_pic->u_buffer,
2116
146k
                    input_chroma_txb_origin_index,
2117
146k
                    input_pic->u_stride,
2118
146k
                    cand_bf->pred->u_buffer,
2119
146k
                    txb_uv_origin_index,
2120
146k
                    cand_bf->pred->u_stride,
2121
146k
                    cropped_tx_width_uv,
2122
146k
                    cropped_tx_height_uv);
2123
146k
                if (effective_ac_bias) {
2124
0
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] += get_svt_psy_full_dist(
2125
0
                        input_pic->u_buffer,
2126
0
                        input_chroma_txb_origin_index,
2127
0
                        input_pic->u_stride,
2128
0
                        cand_bf->pred->u_buffer,
2129
0
                        txb_uv_origin_index,
2130
0
                        cand_bf->pred->u_stride,
2131
0
                        cropped_tx_width_uv,
2132
0
                        cropped_tx_height_uv,
2133
0
                        ctx->hbd_md,
2134
0
                        effective_ac_bias);
2135
0
                }
2136
2137
146k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
2138
146k
                    input_pic->u_buffer,
2139
146k
                    input_chroma_txb_origin_index,
2140
146k
                    input_pic->u_stride,
2141
146k
                    cand_bf->recon->u_buffer,
2142
146k
                    txb_uv_origin_index,
2143
146k
                    cand_bf->recon->u_stride,
2144
146k
                    cropped_tx_width_uv,
2145
146k
                    cropped_tx_height_uv);
2146
146k
                if (effective_ac_bias) {
2147
0
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] += get_svt_psy_full_dist(
2148
0
                        input_pic->u_buffer,
2149
0
                        input_chroma_txb_origin_index,
2150
0
                        input_pic->u_stride,
2151
0
                        cand_bf->recon->u_buffer,
2152
0
                        txb_uv_origin_index,
2153
0
                        cand_bf->recon->u_stride,
2154
0
                        cropped_tx_width_uv,
2155
0
                        cropped_tx_height_uv,
2156
0
                        ctx->hbd_md,
2157
0
                        effective_ac_bias);
2158
0
                }
2159
2160
146k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] <<= 4;
2161
146k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] <<= 4;
2162
18.4E
            } else {
2163
                // *Full Distortion (SSE)
2164
                // *Note - there are known issues with how this distortion metric is currently
2165
                //    calculated.  The amount of scaling between the two arrays is not
2166
                //    equivalent.
2167
18.4E
                uint32_t bwidth  = tx_width_uv;
2168
18.4E
                uint32_t bheight = tx_height_uv;
2169
18.4E
                if (pf_shape) {
2170
0
                    bwidth  = MAX((bwidth >> pf_shape), 4);
2171
0
                    bheight = (bheight >> pf_shape);
2172
0
                }
2173
18.4E
                svt_aom_picture_full_distortion32_bits_single(
2174
18.4E
                    &(((int32_t*)ctx->tx_coeffs->u_buffer)[txb_1d_offset]),
2175
18.4E
                    &(((int32_t*)cand_bf->rec_coeff->u_buffer)[txb_1d_offset]),
2176
18.4E
                    tx_width_uv,
2177
18.4E
                    bwidth,
2178
18.4E
                    bheight,
2179
18.4E
                    txb_full_distortion[DIST_SSD][1],
2180
18.4E
                    cand_bf->eob.u[txb_itr]);
2181
2182
18.4E
                const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2;
2183
18.4E
                txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(
2184
18.4E
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL], chroma_shift);
2185
18.4E
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(
2186
18.4E
                    txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION], chroma_shift);
2187
18.4E
            }
2188
146k
            cand_bf->u_has_coeff |= ((cand_bf->eob.u[txb_itr] != 0) << txb_itr);
2189
146k
            cb_full_distortion[DIST_SSIM][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSIM][1][DIST_CALC_RESIDUAL];
2190
146k
            cb_full_distortion[DIST_SSIM][DIST_CALC_PREDICTION] +=
2191
146k
                txb_full_distortion[DIST_SSIM][1][DIST_CALC_PREDICTION];
2192
2193
146k
            cb_full_distortion[DIST_SSD][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSD][1][DIST_CALC_RESIDUAL];
2194
146k
            cb_full_distortion[DIST_SSD][DIST_CALC_PREDICTION] +=
2195
146k
                txb_full_distortion[DIST_SSD][1][DIST_CALC_PREDICTION];
2196
146k
        }
2197
2198
146k
        if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA ||
2199
146k
            component_type == COMPONENT_ALL) {
2200
146k
            ctx->cr_txb_skip_context = 0;
2201
146k
            ctx->cr_dc_sign_context  = 0;
2202
146k
            if (ctx->rate_est_ctrls.update_skip_ctx_dc_sign_ctx) {
2203
0
                svt_aom_get_txb_ctx(pcs,
2204
0
                                    COMPONENT_CHROMA,
2205
0
                                    ctx->cr_dc_sign_level_coeff_na,
2206
0
                                    ROUND_UV(ctx->blk_org_x + txb_origin_x) >> 1,
2207
0
                                    ROUND_UV(ctx->blk_org_y + txb_origin_y) >> 1,
2208
0
                                    ctx->blk_geom->bsize_uv,
2209
0
                                    tx_size_uv,
2210
0
                                    &ctx->cr_txb_skip_context,
2211
0
                                    &ctx->cr_dc_sign_context);
2212
0
            }
2213
            // Configure the Chroma Residual Ptr
2214
2215
146k
            chroma_residual_ptr = &(((int16_t*)cand_bf->residual->v_buffer)[tu_cr_origin_index]);
2216
2217
            // Cr Transform
2218
146k
            svt_aom_estimate_transform(pcs,
2219
146k
                                       ctx,
2220
146k
                                       chroma_residual_ptr,
2221
146k
                                       cand_bf->residual->v_stride,
2222
146k
                                       &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]),
2223
146k
                                       NOT_USED_VALUE,
2224
146k
                                       tx_size_uv,
2225
146k
                                       &ctx->three_quad_energy,
2226
146k
                                       ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2227
146k
                                       cand_bf->cand->transform_type_uv,
2228
146k
                                       PLANE_TYPE_UV,
2229
146k
                                       pf_shape);
2230
146k
            int32_t seg_qp               = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled
2231
146k
                              ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q]
2232
146k
                              : 0;
2233
146k
            cand_bf->quant_dc.v[txb_itr] = svt_aom_quantize_inv_quantize(
2234
146k
                pcs,
2235
146k
                ctx,
2236
146k
                &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]),
2237
146k
                &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset]),
2238
146k
                &(((int32_t*)cand_bf->rec_coeff->v_buffer)[txb_1d_offset]),
2239
146k
                chroma_qindex,
2240
146k
                seg_qp,
2241
146k
                tx_size_uv,
2242
146k
                &cand_bf->eob.v[txb_itr],
2243
146k
                COMPONENT_CHROMA_CR,
2244
146k
                ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
2245
146k
                cand_bf->cand->transform_type_uv,
2246
146k
                ctx->cr_txb_skip_context,
2247
146k
                ctx->cr_dc_sign_context,
2248
146k
                cand_bf->cand->block_mi.mode,
2249
146k
                full_lambda,
2250
146k
                false);
2251
146k
            if (is_full_loop && ctx->mds_do_spatial_sse) {
2252
146k
                uint32_t cr_has_coeff = cand_bf->eob.v[txb_itr] > 0;
2253
2254
146k
                if (cr_has_coeff) {
2255
6.22k
                    svt_aom_inv_transform_recon_wrapper(pcs,
2256
6.22k
                                                        ctx,
2257
6.22k
                                                        cand_bf->pred->v_buffer,
2258
6.22k
                                                        tu_cr_origin_index,
2259
6.22k
                                                        cand_bf->pred->v_stride,
2260
6.22k
                                                        cand_bf->recon->v_buffer,
2261
6.22k
                                                        tu_cr_origin_index,
2262
6.22k
                                                        cand_bf->recon->v_stride,
2263
6.22k
                                                        (int32_t*)cand_bf->rec_coeff->v_buffer,
2264
6.22k
                                                        txb_1d_offset,
2265
6.22k
                                                        ctx->hbd_md,
2266
6.22k
                                                        tx_size_uv,
2267
6.22k
                                                        cand_bf->cand->transform_type_uv,
2268
6.22k
                                                        PLANE_TYPE_UV,
2269
6.22k
                                                        (uint32_t)cand_bf->eob.v[txb_itr]);
2270
139k
                } else {
2271
139k
                    svt_av1_picture_copy_cr(cand_bf->pred,
2272
139k
                                            tu_cb_origin_index,
2273
139k
                                            cand_bf->recon,
2274
139k
                                            tu_cb_origin_index,
2275
139k
                                            tx_width_uv,
2276
139k
                                            tx_height_uv,
2277
139k
                                            ctx->hbd_md);
2278
139k
                }
2279
146k
                const uint32_t input_chroma_txb_origin_index = ((ROUND_UV(ctx->blk_org_x + txb_origin_x)) >> 1) +
2280
146k
                    ((ROUND_UV(ctx->blk_org_y + txb_origin_y)) >> 1) * input_pic->v_stride;
2281
146k
                const int32_t txb_uv_origin_index = (ROUND_UV(txb_origin_x) +
2282
146k
                                                     (ROUND_UV(txb_origin_y) * cand_bf->quant->v_stride)) >>
2283
146k
                    1;
2284
2285
146k
                if (ssim_level == SSIM_LVL_1 || ssim_level == SSIM_LVL_3) {
2286
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION] = svt_spatial_full_distortion_ssim_kernel(
2287
0
                        input_pic->v_buffer,
2288
0
                        input_chroma_txb_origin_index,
2289
0
                        input_pic->v_stride,
2290
0
                        cand_bf->pred->v_buffer,
2291
0
                        txb_uv_origin_index,
2292
0
                        cand_bf->pred->v_stride,
2293
0
                        cropped_tx_width_uv,
2294
0
                        cropped_tx_height_uv,
2295
0
                        ctx->hbd_md,
2296
0
                        effective_ac_bias);
2297
2298
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL] = svt_spatial_full_distortion_ssim_kernel(
2299
0
                        input_pic->v_buffer,
2300
0
                        input_chroma_txb_origin_index,
2301
0
                        input_pic->v_stride,
2302
0
                        cand_bf->recon->v_buffer,
2303
0
                        txb_uv_origin_index,
2304
0
                        cand_bf->recon->v_stride,
2305
0
                        cropped_tx_width_uv,
2306
0
                        cropped_tx_height_uv,
2307
0
                        ctx->hbd_md,
2308
0
                        effective_ac_bias);
2309
2310
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION] <<= 4;
2311
0
                    txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL] <<= 4;
2312
0
                }
2313
146k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
2314
146k
                    input_pic->v_buffer,
2315
146k
                    input_chroma_txb_origin_index,
2316
146k
                    input_pic->v_stride,
2317
146k
                    cand_bf->pred->v_buffer,
2318
146k
                    txb_uv_origin_index,
2319
146k
                    cand_bf->pred->v_stride,
2320
146k
                    cropped_tx_width_uv,
2321
146k
                    cropped_tx_height_uv);
2322
146k
                if (effective_ac_bias) {
2323
0
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] += get_svt_psy_full_dist(
2324
0
                        input_pic->v_buffer,
2325
0
                        input_chroma_txb_origin_index,
2326
0
                        input_pic->v_stride,
2327
0
                        cand_bf->pred->v_buffer,
2328
0
                        txb_uv_origin_index,
2329
0
                        cand_bf->pred->v_stride,
2330
0
                        cropped_tx_width_uv,
2331
0
                        cropped_tx_height_uv,
2332
0
                        ctx->hbd_md,
2333
0
                        effective_ac_bias);
2334
0
                }
2335
2336
146k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
2337
146k
                    input_pic->v_buffer,
2338
146k
                    input_chroma_txb_origin_index,
2339
146k
                    input_pic->v_stride,
2340
146k
                    cand_bf->recon->v_buffer,
2341
146k
                    txb_uv_origin_index,
2342
146k
                    cand_bf->recon->v_stride,
2343
146k
                    cropped_tx_width_uv,
2344
146k
                    cropped_tx_height_uv);
2345
146k
                if (effective_ac_bias) {
2346
0
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] += get_svt_psy_full_dist(
2347
0
                        input_pic->v_buffer,
2348
0
                        input_chroma_txb_origin_index,
2349
0
                        input_pic->v_stride,
2350
0
                        cand_bf->recon->v_buffer,
2351
0
                        txb_uv_origin_index,
2352
0
                        cand_bf->recon->v_stride,
2353
0
                        cropped_tx_width_uv,
2354
0
                        cropped_tx_height_uv,
2355
0
                        ctx->hbd_md,
2356
0
                        effective_ac_bias);
2357
0
                }
2358
2359
146k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] <<= 4;
2360
146k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] <<= 4;
2361
146k
            } else {
2362
                // *Full Distortion (SSE)
2363
                // *Note - there are known issues with how this distortion metric is currently
2364
                //    calculated.  The amount of scaling between the two arrays is not
2365
                //    equivalent.
2366
308
                uint32_t bwidth  = tx_width_uv;
2367
308
                uint32_t bheight = tx_height_uv;
2368
308
                if (pf_shape) {
2369
0
                    bwidth  = MAX((bwidth >> pf_shape), 4);
2370
0
                    bheight = (bheight >> pf_shape);
2371
0
                }
2372
308
                svt_aom_picture_full_distortion32_bits_single(
2373
308
                    &(((int32_t*)ctx->tx_coeffs->v_buffer)[txb_1d_offset]),
2374
308
                    &(((int32_t*)cand_bf->rec_coeff->v_buffer)[txb_1d_offset]),
2375
308
                    tx_width_uv,
2376
308
                    bwidth,
2377
308
                    bheight,
2378
308
                    txb_full_distortion[DIST_SSD][2],
2379
308
                    cand_bf->eob.v[txb_itr]);
2380
2381
308
                const int32_t chroma_shift = (MAX_TX_SCALE - av1_get_tx_scale_tab[tx_size_uv]) * 2;
2382
308
                txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(
2383
308
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL], chroma_shift);
2384
308
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(
2385
308
                    txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION], chroma_shift);
2386
308
            }
2387
146k
            cand_bf->v_has_coeff |= ((cand_bf->eob.v[txb_itr] != 0) << txb_itr);
2388
146k
            cr_full_distortion[DIST_SSIM][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSIM][2][DIST_CALC_RESIDUAL];
2389
146k
            cr_full_distortion[DIST_SSIM][DIST_CALC_PREDICTION] +=
2390
146k
                txb_full_distortion[DIST_SSIM][2][DIST_CALC_PREDICTION];
2391
2392
146k
            cr_full_distortion[DIST_SSD][DIST_CALC_RESIDUAL] += txb_full_distortion[DIST_SSD][2][DIST_CALC_RESIDUAL];
2393
146k
            cr_full_distortion[DIST_SSD][DIST_CALC_PREDICTION] +=
2394
146k
                txb_full_distortion[DIST_SSD][2][DIST_CALC_PREDICTION];
2395
146k
        }
2396
2397
145k
        const uint32_t txb_origin_index = txb_origin_x + txb_origin_y * cand_bf->quant->y_stride;
2398
2399
        // Reset the Bit Costs
2400
145k
        uint64_t y_txb_coeff_bits  = 0;
2401
145k
        uint64_t cb_txb_coeff_bits = 0;
2402
145k
        uint64_t cr_txb_coeff_bits = 0;
2403
2404
        //CHROMA-ONLY
2405
145k
        svt_aom_txb_estimate_coeff_bits(ctx,
2406
145k
                                        0,
2407
145k
                                        NULL,
2408
145k
                                        pcs,
2409
145k
                                        cand_bf,
2410
145k
                                        txb_origin_index,
2411
145k
                                        txb_1d_offset,
2412
145k
                                        cand_bf->quant,
2413
145k
                                        cand_bf->eob.y[txb_itr],
2414
145k
                                        cand_bf->eob.u[txb_itr],
2415
145k
                                        cand_bf->eob.v[txb_itr],
2416
145k
                                        &y_txb_coeff_bits,
2417
145k
                                        &cb_txb_coeff_bits,
2418
145k
                                        &cr_txb_coeff_bits,
2419
145k
                                        tx_size,
2420
145k
                                        tx_size_uv,
2421
145k
                                        cand_bf->cand->transform_type[txb_itr],
2422
145k
                                        cand_bf->cand->transform_type_uv,
2423
145k
                                        component_type);
2424
2425
145k
        *cb_coeff_bits += cb_txb_coeff_bits;
2426
145k
        *cr_coeff_bits += cr_txb_coeff_bits;
2427
145k
        txb_1d_offset += tx_width_uv * tx_height_uv;
2428
2429
145k
        ++txb_itr;
2430
145k
    } while (txb_itr < tu_count);
2431
145k
}
2432
2433
/*
2434
  check if we need to do inverse transform and recon
2435
*/
2436
284k
uint8_t svt_aom_do_md_recon(PictureParentControlSet* pcs, ModeDecisionContext* ctx) {
2437
284k
    const uint8_t encdec_bypass = ctx->bypass_encdec &&
2438
284k
        (ctx->pd_pass == PD_PASS_1); // if enc dec is bypassed MD has to produce the final recon
2439
284k
    const uint8_t need_md_rec_for_intra_pred = !ctx->skip_intra ||
2440
0
        ctx->inter_intra_comp_ctrls.enabled; // for intra prediction of current frame
2441
284k
    const uint8_t need_md_rec_for_ref = (pcs->is_ref || pcs->scs->static_config.recon_enabled) &&
2442
0
        encdec_bypass; // for inter prediction of future frame or if recon is being output
2443
284k
    const uint8_t need_md_rec_for_dlf_search  = pcs->dlf_ctrls.enabled; // for DLF levels
2444
284k
    const uint8_t need_md_rec_for_cdef_search = pcs->cdef_search_ctrls.enabled &&
2445
284k
        !pcs->cdef_search_ctrls.use_qp_strength &&
2446
0
        !pcs->cdef_search_ctrls.use_reference_cdef_fs; // CDEF search levels needing the recon samples
2447
284k
    const uint8_t need_md_rec_for_restoration_search = pcs->enable_restoration; // any resoration search level
2448
284k
    const uint8_t need_md_rec_for_quality            = (pcs->compute_psnr || pcs->compute_ssim) &&
2449
0
        (ctx->pd_pass == PD_PASS_1); // stat report needs recon samples for metrics
2450
284k
    uint8_t do_recon;
2451
284k
    if (need_md_rec_for_intra_pred || need_md_rec_for_ref || need_md_rec_for_dlf_search ||
2452
284k
        need_md_rec_for_cdef_search || need_md_rec_for_restoration_search || need_md_rec_for_quality) {
2453
284k
        do_recon = 1;
2454
284k
    } else {
2455
83
        do_recon = 0;
2456
83
    }
2457
2458
284k
    return do_recon;
2459
284k
}