Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/coding_loop.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 3-Clause Clear License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
#include <string.h>
13
14
#include "coding_loop.h"
15
#include "utility.h"
16
#include "rd_cost.h"
17
#include "deblocking_filter.h"
18
#include "pic_operators.h"
19
#include "segmentation.h"
20
#include "enc_dec_process.h"
21
#include "EbSvtAv1ErrorCodes.h"
22
#include "transforms.h"
23
#include "inv_transforms.h"
24
#include "md_config_process.h"
25
#include "enc_intra_prediction.h"
26
#include "aom_dsp_rtcd.h"
27
#include "md_rate_estimation.h"
28
#include "full_loop.h"
29
#include "pack_unpack_c.h"
30
#include "enc_inter_prediction.h"
31
32
void aom_av1_set_ssim_rdmult(ModeDecisionContext* ctx, PictureControlSet* pcs, const int mi_row, const int mi_col);
33
34
0
static EbErrorType ec_rtime_alloc_palette_info(EcBlkStruct* md_blk_arr_nsq) {
35
0
    EB_MALLOC_ARRAY(md_blk_arr_nsq->palette_info, 1);
36
0
    EB_MALLOC_ARRAY(md_blk_arr_nsq->palette_info->color_idx_map, MAX_PALETTE_SQUARE);
37
38
0
    return EB_ErrorNone;
39
0
}
40
41
/*******************************************
42
* set Penalize Skip Flag
43
*
44
* Summary: Set the penalize_skipflag to true
45
* When there is luminance/chrominance change
46
* or in noisy clip with low motion at meduim
47
* varince area
48
*
49
*******************************************/
50
51
typedef void (*EbAv1EncodeLoopFuncPtr)(PictureControlSet* pcs, EncDecContext* ed_ctx, SuperBlock* sb_ptr,
52
                                       uint32_t org_x, uint32_t org_y,
53
                                       EbPictureBufferDesc* pred_samples, // no basis/offset
54
                                       EbPictureBufferDesc* coeff_samples_sb, // sb based
55
                                       EbPictureBufferDesc* residual16bit, // no basis/offset
56
                                       EbPictureBufferDesc* transform16bit, // no basis/offset
57
                                       EbPictureBufferDesc* inverse_quant_buffer, uint32_t component_mask,
58
                                       uint16_t* eob);
59
60
typedef void (*EbAv1GenerateReconFuncPtr)(EncDecContext* ed_ctx, uint32_t org_x, uint32_t org_y,
61
                                          EbPictureBufferDesc* pred_samples, // no basis/offset
62
                                          EbPictureBufferDesc* residual16bit, // no basis/offset
63
                                          uint32_t component_mask, uint16_t* eob);
64
65
/*******************************************
66
* Residual Kernel 8-16bit
67
    Computes the residual data
68
*******************************************/
69
void svt_aom_residual_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, uint8_t* pred,
70
                             uint32_t pred_offset, uint32_t pred_stride, int16_t* residual, uint32_t residual_offset,
71
1.22M
                             uint32_t residual_stride, bool hbd, uint32_t area_width, uint32_t area_height) {
72
1.22M
    if (hbd) {
73
0
        svt_residual_kernel16bit(((uint16_t*)input) + input_offset,
74
0
                                 input_stride,
75
0
                                 ((uint16_t*)pred) + pred_offset,
76
0
                                 pred_stride,
77
0
                                 residual + residual_offset,
78
0
                                 residual_stride,
79
0
                                 area_width,
80
0
                                 area_height);
81
1.22M
    } else {
82
1.22M
        svt_residual_kernel8bit(&(input[input_offset]),
83
1.22M
                                input_stride,
84
1.22M
                                &(pred[pred_offset]),
85
1.22M
                                pred_stride,
86
1.22M
                                residual + residual_offset,
87
1.22M
                                residual_stride,
88
1.22M
                                area_width,
89
1.22M
                                area_height);
90
1.22M
    }
91
1.22M
}
92
93
/***************************************************
94
* Update Recon Samples Neighbor Arrays
95
***************************************************/
96
static void encode_pass_update_recon_sample_neighbour_arrays(
97
    NeighborArrayUnit* lumaReconSampleNeighborArray, NeighborArrayUnit* cbReconSampleNeighborArray,
98
    NeighborArrayUnit* crReconSampleNeighborArray, EbPictureBufferDesc* recon_buffer, uint32_t org_x, uint32_t org_y,
99
0
    uint32_t width, uint32_t height, uint32_t bwidth_uv, uint32_t bheight_uv, uint32_t component_mask, bool is_16bit) {
100
0
    uint32_t round_origin_x = ROUND_UV(org_x); // for Chroma blocks with size of 4
101
0
    uint32_t round_origin_y = ROUND_UV(org_y); // for Chroma blocks with size of 4
102
103
0
    if (is_16bit == true) {
104
0
        if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
105
            // Recon Samples - Luma
106
0
            svt_aom_neighbor_array_unit16bit_sample_write(lumaReconSampleNeighborArray,
107
0
                                                          (uint16_t*)(recon_buffer->y_buffer),
108
0
                                                          recon_buffer->y_stride,
109
0
                                                          org_x,
110
0
                                                          org_y,
111
0
                                                          org_x,
112
0
                                                          org_y,
113
0
                                                          width,
114
0
                                                          height,
115
0
                                                          NEIGHBOR_ARRAY_UNIT_FULL_MASK);
116
0
        }
117
118
0
        if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
119
            // Recon Samples - Cb
120
0
            svt_aom_neighbor_array_unit16bit_sample_write(cbReconSampleNeighborArray,
121
0
                                                          (uint16_t*)(recon_buffer->u_buffer),
122
0
                                                          recon_buffer->u_stride,
123
0
                                                          round_origin_x >> 1,
124
0
                                                          round_origin_y >> 1,
125
0
                                                          round_origin_x >> 1,
126
0
                                                          round_origin_y >> 1,
127
0
                                                          bwidth_uv,
128
0
                                                          bheight_uv,
129
0
                                                          NEIGHBOR_ARRAY_UNIT_FULL_MASK);
130
131
            // Recon Samples - Cr
132
0
            svt_aom_neighbor_array_unit16bit_sample_write(crReconSampleNeighborArray,
133
0
                                                          (uint16_t*)(recon_buffer->v_buffer),
134
0
                                                          recon_buffer->v_stride,
135
0
                                                          round_origin_x >> 1,
136
0
                                                          round_origin_y >> 1,
137
0
                                                          round_origin_x >> 1,
138
0
                                                          round_origin_y >> 1,
139
0
                                                          bwidth_uv,
140
0
                                                          bheight_uv,
141
0
                                                          NEIGHBOR_ARRAY_UNIT_FULL_MASK);
142
0
        }
143
0
    } else {
144
0
        if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
145
            // Recon Samples - Luma
146
0
            svt_aom_neighbor_array_unit_sample_write(lumaReconSampleNeighborArray,
147
0
                                                     recon_buffer->y_buffer,
148
0
                                                     recon_buffer->y_stride,
149
0
                                                     org_x,
150
0
                                                     org_y,
151
0
                                                     org_x,
152
0
                                                     org_y,
153
0
                                                     width,
154
0
                                                     height,
155
0
                                                     NEIGHBOR_ARRAY_UNIT_FULL_MASK);
156
0
        }
157
158
0
        if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
159
            // Recon Samples - Cb
160
0
            svt_aom_neighbor_array_unit_sample_write(cbReconSampleNeighborArray,
161
0
                                                     recon_buffer->u_buffer,
162
0
                                                     recon_buffer->u_stride,
163
0
                                                     round_origin_x >> 1,
164
0
                                                     round_origin_y >> 1,
165
0
                                                     round_origin_x >> 1,
166
0
                                                     round_origin_y >> 1,
167
0
                                                     bwidth_uv,
168
0
                                                     bheight_uv,
169
0
                                                     NEIGHBOR_ARRAY_UNIT_FULL_MASK);
170
171
            // Recon Samples - Cr
172
0
            svt_aom_neighbor_array_unit_sample_write(crReconSampleNeighborArray,
173
0
                                                     recon_buffer->v_buffer,
174
0
                                                     recon_buffer->v_stride,
175
0
                                                     round_origin_x >> 1,
176
0
                                                     round_origin_y >> 1,
177
0
                                                     round_origin_x >> 1,
178
0
                                                     round_origin_y >> 1,
179
0
                                                     bwidth_uv,
180
0
                                                     bheight_uv,
181
0
                                                     NEIGHBOR_ARRAY_UNIT_FULL_MASK);
182
0
        }
183
0
    }
184
0
}
185
186
/**********************************************************
187
* Encode Loop
188
*
189
* Summary: Performs an AV1 conformant CfL prediction based on
190
* recon luma samples in pred_samples
191
*
192
* Inputs:
193
*   pred_samples - recon luma samples on which CfL prediction is based
194
*
195
* Outputs:
196
*   pred_samples - predicted chroma samples for cb and cr
197
*
198
**********************************************************/
199
static void av1_encode_generate_cfl_prediction(EbPictureBufferDesc* pred_samples, EncDecContext* ed_ctx,
200
                                               uint32_t pred_cb_offset, uint32_t pred_cr_offset,
201
0
                                               uint32_t round_origin_x, uint32_t round_origin_y) {
202
0
    bool             is_16bit = ed_ctx->is_16bit;
203
0
    const BlockGeom* blk_geom = ed_ctx->blk_geom;
204
0
    BlkStruct*       blk_ptr  = ed_ctx->blk_ptr;
205
206
0
    EbPictureBufferDesc* recon_samples = pred_samples;
207
208
0
    uint32_t recon_luma_offset = (round_origin_y * recon_samples->y_stride) + round_origin_x;
209
210
    // Down sample Luma
211
0
    if (is_16bit) {
212
0
        svt_cfl_luma_subsampling_420_hbd(
213
0
            ((uint16_t*)recon_samples->y_buffer) + recon_luma_offset,
214
0
            recon_samples->y_stride,
215
0
            ed_ctx->md_ctx->pred_buf_q3,
216
0
            blk_geom->bwidth_uv == blk_geom->bwidth ? (blk_geom->bwidth_uv << 1) : blk_geom->bwidth,
217
0
            blk_geom->bheight_uv == blk_geom->bheight ? (blk_geom->bheight_uv << 1) : blk_geom->bheight);
218
0
    } else {
219
0
        svt_cfl_luma_subsampling_420_lbd(
220
0
            recon_samples->y_buffer + recon_luma_offset,
221
0
            recon_samples->y_stride,
222
0
            ed_ctx->md_ctx->pred_buf_q3,
223
0
            blk_geom->bwidth_uv == blk_geom->bwidth ? (blk_geom->bwidth_uv << 1) : blk_geom->bwidth,
224
0
            blk_geom->bheight_uv == blk_geom->bheight ? (blk_geom->bheight_uv << 1) : blk_geom->bheight);
225
0
    }
226
227
0
    const TxSize tx_size_uv   = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1);
228
0
    const int    tx_width_uv  = tx_size_wide[tx_size_uv];
229
0
    const int    tx_height_uv = tx_size_high[tx_size_uv];
230
231
0
    int32_t round_offset = (tx_width_uv * tx_height_uv) / 2;
232
233
0
    svt_subtract_average(ed_ctx->md_ctx->pred_buf_q3,
234
0
                         tx_width_uv,
235
0
                         tx_height_uv,
236
0
                         round_offset,
237
0
                         svt_log2f(tx_width_uv) + svt_log2f(tx_height_uv));
238
239
0
    int32_t alpha_q3_cb = cfl_idx_to_alpha(blk_ptr->block_mi.cfl_alpha_idx,
240
0
                                           blk_ptr->block_mi.cfl_alpha_signs,
241
0
                                           CFL_PRED_U); // once for U, once for V
242
0
    int32_t alpha_q3_cr = cfl_idx_to_alpha(blk_ptr->block_mi.cfl_alpha_idx,
243
0
                                           blk_ptr->block_mi.cfl_alpha_signs,
244
0
                                           CFL_PRED_V); // once for U, once for V
245
246
0
    if (is_16bit) {
247
0
        svt_cfl_predict_hbd(ed_ctx->md_ctx->pred_buf_q3,
248
0
                            ((uint16_t*)pred_samples->u_buffer) + pred_cb_offset,
249
0
                            pred_samples->u_stride,
250
0
                            ((uint16_t*)pred_samples->u_buffer) + pred_cb_offset,
251
0
                            pred_samples->u_stride,
252
0
                            alpha_q3_cb,
253
0
                            ed_ctx->bit_depth,
254
0
                            tx_width_uv,
255
0
                            tx_height_uv);
256
257
0
        svt_cfl_predict_hbd(ed_ctx->md_ctx->pred_buf_q3,
258
0
                            ((uint16_t*)pred_samples->v_buffer) + pred_cr_offset,
259
0
                            pred_samples->v_stride,
260
0
                            ((uint16_t*)pred_samples->v_buffer) + pred_cr_offset,
261
0
                            pred_samples->v_stride,
262
0
                            alpha_q3_cr,
263
0
                            ed_ctx->bit_depth,
264
0
                            tx_width_uv,
265
0
                            tx_height_uv);
266
0
    } else {
267
0
        svt_cfl_predict_lbd(ed_ctx->md_ctx->pred_buf_q3,
268
0
                            pred_samples->u_buffer + pred_cb_offset,
269
0
                            pred_samples->u_stride,
270
0
                            pred_samples->u_buffer + pred_cb_offset,
271
0
                            pred_samples->u_stride,
272
0
                            alpha_q3_cb,
273
0
                            8,
274
0
                            tx_width_uv,
275
0
                            tx_height_uv);
276
277
0
        svt_cfl_predict_lbd(ed_ctx->md_ctx->pred_buf_q3,
278
0
                            pred_samples->v_buffer + pred_cr_offset,
279
0
                            pred_samples->v_stride,
280
0
                            pred_samples->v_buffer + pred_cr_offset,
281
0
                            pred_samples->v_stride,
282
0
                            alpha_q3_cr,
283
0
                            8,
284
0
                            tx_width_uv,
285
0
                            tx_height_uv);
286
0
    }
287
0
}
288
289
/**********************************************************
290
* Encode Loop
291
*
292
* Summary: Performs an AV1 conformant
293
*   Transform, Quantization  and Inverse Quantization of a TU.
294
*
295
* Inputs:
296
*   org_x
297
*   org_y
298
*   txb_size
299
*   sb_sz
300
*   input - input samples (position sensitive)
301
*   pred - prediction samples (position independent)
302
*
303
* Outputs:
304
*   Inverse quantized coeff - quantization indices (position sensitive)
305
*
306
**********************************************************/
307
static void av1_encode_loop(PictureControlSet* pcs, EncDecContext* ed_ctx, uint32_t org_x, uint32_t org_y,
308
                            EbPictureBufferDesc* pred_samples, // no basis/offset
309
                            EbPictureBufferDesc* coeff_samples_sb, // sb based
310
                            EbPictureBufferDesc* residual16bit, // no basis/offset
311
                            EbPictureBufferDesc* transform16bit, // no basis/offset
312
                            EbPictureBufferDesc* inverse_quant_buffer, uint32_t component_mask, uint16_t* eob)
313
314
0
{
315
0
    ModeDecisionContext* md_ctx        = ed_ctx->md_ctx;
316
0
    const BlockGeom*     blk_geom      = ed_ctx->blk_geom;
317
0
    BlkStruct*           blk_ptr       = ed_ctx->blk_ptr;
318
0
    const uint32_t       qindex        = blk_ptr->qindex;
319
0
    const bool           is_16bit      = ed_ctx->is_16bit;
320
0
    const uint32_t       bit_depth     = ed_ctx->bit_depth;
321
0
    EbPictureBufferDesc* input_samples = is_16bit ? ed_ctx->input_sample16bit_buffer : ed_ctx->input_samples;
322
323
0
    const bool     is_inter       = is_inter_block(&blk_ptr->block_mi);
324
0
    const uint32_t round_origin_x = ROUND_UV(org_x); // for Chroma blocks with size of 4
325
0
    const uint32_t round_origin_y = ROUND_UV(org_y); // for Chroma blocks with size of 4
326
0
    const uint8_t  tx_depth       = blk_ptr->block_mi.tx_depth;
327
    // Get the tx origin coordinates within the SB (not frame)
328
0
    const uint16_t tx_org_x = org_x - md_ctx->sb_origin_x;
329
0
    const uint16_t tx_org_y = org_y - md_ctx->sb_origin_y;
330
0
    const int32_t  seg_qp   = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled
331
0
           ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ed_ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q]
332
0
           : 0;
333
334
0
    uint32_t input_luma_offset, input_cb_offset, input_cr_offset;
335
0
    uint32_t pred_luma_offset, pred_cb_offset, pred_cr_offset;
336
0
    uint32_t scratch_luma_offset, scratch_cb_offset, scratch_cr_offset;
337
0
    if (is_16bit) {
338
0
        input_luma_offset = tx_org_x + tx_org_y * input_samples->y_stride;
339
0
        input_cb_offset   = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * input_samples->u_stride;
340
0
        input_cr_offset   = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * input_samples->v_stride;
341
0
        pred_luma_offset  = (org_y * pred_samples->y_stride) + org_x;
342
0
        pred_cb_offset    = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->u_stride);
343
0
        pred_cr_offset    = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->v_stride);
344
0
    } else {
345
0
        input_luma_offset = (org_y * input_samples->y_stride) + org_x;
346
0
        input_cb_offset   = ((round_origin_y >> 1) * input_samples->u_stride) + (round_origin_x >> 1);
347
0
        input_cr_offset   = ((round_origin_y >> 1) * input_samples->v_stride) + (round_origin_x >> 1);
348
349
0
        pred_luma_offset = org_x + (org_y * pred_samples->y_stride);
350
0
        pred_cb_offset   = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->u_stride);
351
0
        pred_cr_offset   = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->v_stride);
352
0
    }
353
354
0
    if (bit_depth != EB_EIGHT_BIT) {
355
        // Get the block origin coordinates within the SB (not frame)
356
0
        const uint16_t blk_org_x_in_sb = md_ctx->blk_org_x - md_ctx->sb_origin_x;
357
0
        const uint16_t blk_org_y_in_sb = md_ctx->blk_org_y - md_ctx->sb_origin_y;
358
0
        scratch_luma_offset            = blk_org_x_in_sb + blk_org_y_in_sb * residual16bit->y_stride;
359
0
        scratch_cb_offset = ROUND_UV(blk_org_x_in_sb) / 2 + ROUND_UV(blk_org_y_in_sb) / 2 * residual16bit->u_stride;
360
0
        scratch_cr_offset = ROUND_UV(blk_org_x_in_sb) / 2 + ROUND_UV(blk_org_y_in_sb) / 2 * residual16bit->v_stride;
361
0
    } else {
362
0
        scratch_luma_offset = tx_org_x + tx_org_y * residual16bit->y_stride;
363
0
        scratch_cb_offset   = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * residual16bit->u_stride;
364
0
        scratch_cr_offset   = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * residual16bit->v_stride;
365
0
    }
366
0
    ed_ctx->three_quad_energy = 0;
367
368
0
    if (pcs->ppcs->blk_lambda_tuning) {
369
0
        md_ctx->blk_geom  = ed_ctx->blk_geom;
370
0
        md_ctx->blk_org_x = ed_ctx->blk_org_x;
371
0
        md_ctx->blk_org_y = ed_ctx->blk_org_y;
372
        //Get the new lambda for current block
373
0
        svt_aom_set_tuned_blk_lambda(md_ctx, pcs);
374
0
    } else if (pcs->ppcs->scs->static_config.tune == TUNE_SSIM || pcs->ppcs->scs->static_config.tune == TUNE_IQ ||
375
0
               pcs->ppcs->scs->static_config.tune == TUNE_MS_SSIM) {
376
0
        md_ctx->blk_geom  = ed_ctx->blk_geom;
377
0
        md_ctx->blk_org_x = ed_ctx->blk_org_x;
378
0
        md_ctx->blk_org_y = ed_ctx->blk_org_y;
379
0
        int mi_row        = ed_ctx->blk_org_y / 4;
380
0
        int mi_col        = ed_ctx->blk_org_x / 4;
381
0
        aom_av1_set_ssim_rdmult(md_ctx, pcs, mi_row, mi_col);
382
0
    }
383
384
    //**********************************
385
    // Luma
386
    //**********************************
387
0
    if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) {
388
0
        if (ed_ctx->md_skip_blk) {
389
0
            eob[0]                               = 0;
390
0
            blk_ptr->quant_dc.y[ed_ctx->txb_itr] = 0;
391
0
        } else {
392
0
            const TxSize tx_size   = tx_depth_to_tx_size[tx_depth][blk_geom->bsize];
393
0
            const int    tx_width  = tx_size_wide[tx_size];
394
0
            const int    tx_height = tx_size_high[tx_size];
395
0
            svt_aom_residual_kernel(input_samples->y_buffer,
396
0
                                    input_luma_offset,
397
0
                                    input_samples->y_stride,
398
0
                                    pred_samples->y_buffer,
399
0
                                    pred_luma_offset,
400
0
                                    pred_samples->y_stride,
401
0
                                    ((int16_t*)residual16bit->y_buffer),
402
0
                                    scratch_luma_offset,
403
0
                                    residual16bit->y_stride,
404
0
                                    is_16bit, // hbd
405
0
                                    tx_width,
406
0
                                    tx_height);
407
0
            svt_aom_estimate_transform(pcs,
408
0
                                       ed_ctx->md_ctx,
409
0
                                       ((int16_t*)residual16bit->y_buffer) + scratch_luma_offset,
410
0
                                       residual16bit->y_stride,
411
0
                                       ((TranLow*)transform16bit->y_buffer) + ed_ctx->coded_area_sb,
412
0
                                       NOT_USED_VALUE,
413
0
                                       tx_size,
414
0
                                       &ed_ctx->three_quad_energy,
415
0
                                       bit_depth,
416
0
                                       blk_ptr->tx_type[ed_ctx->txb_itr],
417
0
                                       PLANE_TYPE_Y,
418
0
                                       DEFAULT_SHAPE);
419
420
0
            blk_ptr->quant_dc.y[ed_ctx->txb_itr] = svt_aom_quantize_inv_quantize(
421
0
                pcs,
422
0
                md_ctx,
423
0
                ((int32_t*)transform16bit->y_buffer) + ed_ctx->coded_area_sb,
424
0
                ((int32_t*)coeff_samples_sb->y_buffer) + ed_ctx->coded_area_sb,
425
0
                ((int32_t*)inverse_quant_buffer->y_buffer) + ed_ctx->coded_area_sb,
426
0
                qindex,
427
0
                seg_qp,
428
0
                tx_size,
429
0
                &eob[0],
430
0
                COMPONENT_LUMA,
431
0
                bit_depth,
432
0
                blk_ptr->tx_type[ed_ctx->txb_itr],
433
0
                md_ctx->luma_txb_skip_context,
434
0
                md_ctx->luma_dc_sign_context,
435
0
                blk_ptr->block_mi.mode,
436
0
                md_ctx->full_lambda_md[(bit_depth == EB_TEN_BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
437
0
                true);
438
0
        }
439
440
0
        blk_ptr->y_has_coeff |= (eob[0] > 0) << ed_ctx->txb_itr;
441
0
        blk_ptr->eob.y[ed_ctx->txb_itr] = (uint16_t)eob[0];
442
443
0
        if (eob[0] == 0) {
444
0
            blk_ptr->tx_type[ed_ctx->txb_itr] = DCT_DCT;
445
            // INTER. Chroma follows Luma in transform type
446
0
            if (ed_ctx->txb_itr == 0 && is_inter) {
447
0
                blk_ptr->tx_type_uv = DCT_DCT;
448
0
            }
449
0
        }
450
0
    }
451
452
0
    if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
453
        // If chroma uses CfL prediction, generate predicted samples based on previously computed recon luma
454
        // samples. The recon luma samples must be from a previous call to av1_encode_loop/av1_encode_generate_recon
455
        // because this function does not generate reconstructed samples.
456
0
        if (is_intra_mode(blk_ptr->block_mi.mode) && blk_ptr->block_mi.uv_mode == UV_CFL_PRED) {
457
0
            av1_encode_generate_cfl_prediction(
458
0
                pred_samples, ed_ctx, pred_cb_offset, pred_cr_offset, round_origin_x, round_origin_y);
459
0
        }
460
461
        //**********************************
462
        // Chroma
463
        //**********************************
464
0
        if (ed_ctx->md_skip_blk) {
465
0
            eob[1]                               = 0;
466
0
            blk_ptr->quant_dc.u[ed_ctx->txb_itr] = 0;
467
0
            eob[2]                               = 0;
468
0
            blk_ptr->quant_dc.v[ed_ctx->txb_itr] = 0;
469
0
        } else {
470
0
            const TxSize tx_size_uv   = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1);
471
0
            const int    tx_width_uv  = tx_size_wide[tx_size_uv];
472
0
            const int    tx_height_uv = tx_size_high[tx_size_uv];
473
            //**********************************
474
            // Cb
475
            //**********************************
476
0
            svt_aom_residual_kernel(input_samples->u_buffer,
477
0
                                    input_cb_offset,
478
0
                                    input_samples->u_stride,
479
0
                                    pred_samples->u_buffer,
480
0
                                    pred_cb_offset,
481
0
                                    pred_samples->u_stride,
482
0
                                    ((int16_t*)residual16bit->u_buffer),
483
0
                                    scratch_cb_offset,
484
0
                                    residual16bit->u_stride,
485
0
                                    is_16bit, // hbd
486
0
                                    tx_width_uv,
487
0
                                    tx_height_uv);
488
0
            svt_aom_estimate_transform(pcs,
489
0
                                       ed_ctx->md_ctx,
490
0
                                       ((int16_t*)residual16bit->u_buffer) + scratch_cb_offset,
491
0
                                       residual16bit->u_stride,
492
0
                                       ((TranLow*)transform16bit->u_buffer) + ed_ctx->coded_area_sb_uv,
493
0
                                       NOT_USED_VALUE,
494
0
                                       tx_size_uv,
495
0
                                       &ed_ctx->three_quad_energy,
496
0
                                       bit_depth,
497
0
                                       blk_ptr->tx_type_uv,
498
0
                                       PLANE_TYPE_UV,
499
0
                                       DEFAULT_SHAPE);
500
501
0
            blk_ptr->quant_dc.u[ed_ctx->txb_itr] = svt_aom_quantize_inv_quantize(
502
0
                pcs,
503
0
                md_ctx,
504
0
                ((int32_t*)transform16bit->u_buffer) + ed_ctx->coded_area_sb_uv,
505
0
                ((int32_t*)coeff_samples_sb->u_buffer) + ed_ctx->coded_area_sb_uv,
506
0
                ((int32_t*)inverse_quant_buffer->u_buffer) + ed_ctx->coded_area_sb_uv,
507
0
                qindex,
508
0
                seg_qp,
509
0
                tx_size_uv,
510
0
                &eob[1],
511
0
                COMPONENT_CHROMA_CB,
512
0
                bit_depth,
513
0
                blk_ptr->tx_type_uv,
514
0
                md_ctx->cb_txb_skip_context,
515
0
                md_ctx->cb_dc_sign_context,
516
0
                blk_ptr->block_mi.mode,
517
0
                md_ctx->full_lambda_md[(bit_depth == EB_TEN_BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
518
0
                true);
519
520
            //**********************************
521
            // Cr
522
            //**********************************
523
0
            svt_aom_residual_kernel(input_samples->v_buffer,
524
0
                                    input_cr_offset,
525
0
                                    input_samples->v_stride,
526
0
                                    pred_samples->v_buffer,
527
0
                                    pred_cr_offset,
528
0
                                    pred_samples->v_stride,
529
0
                                    ((int16_t*)residual16bit->v_buffer),
530
0
                                    scratch_cr_offset,
531
0
                                    residual16bit->v_stride,
532
0
                                    is_16bit, // hbd
533
0
                                    tx_width_uv,
534
0
                                    tx_height_uv);
535
0
            svt_aom_estimate_transform(pcs,
536
0
                                       ed_ctx->md_ctx,
537
0
                                       ((int16_t*)residual16bit->v_buffer) + scratch_cb_offset,
538
0
                                       residual16bit->v_stride,
539
0
                                       ((TranLow*)transform16bit->v_buffer) + ed_ctx->coded_area_sb_uv,
540
0
                                       NOT_USED_VALUE,
541
0
                                       tx_size_uv,
542
0
                                       &ed_ctx->three_quad_energy,
543
0
                                       bit_depth,
544
0
                                       blk_ptr->tx_type_uv,
545
0
                                       PLANE_TYPE_UV,
546
0
                                       DEFAULT_SHAPE);
547
548
0
            blk_ptr->quant_dc.v[ed_ctx->txb_itr] = svt_aom_quantize_inv_quantize(
549
0
                pcs,
550
0
                md_ctx,
551
0
                ((int32_t*)transform16bit->v_buffer) + ed_ctx->coded_area_sb_uv,
552
0
                ((int32_t*)coeff_samples_sb->v_buffer) + ed_ctx->coded_area_sb_uv,
553
0
                ((int32_t*)inverse_quant_buffer->v_buffer) + ed_ctx->coded_area_sb_uv,
554
0
                qindex,
555
0
                seg_qp,
556
0
                tx_size_uv,
557
0
                &eob[2],
558
0
                COMPONENT_CHROMA_CR,
559
0
                bit_depth,
560
0
                blk_ptr->tx_type_uv,
561
0
                md_ctx->cr_txb_skip_context,
562
0
                md_ctx->cr_dc_sign_context,
563
0
                blk_ptr->block_mi.mode,
564
0
                md_ctx->full_lambda_md[(bit_depth == EB_TEN_BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
565
0
                true);
566
0
        }
567
568
0
        blk_ptr->u_has_coeff |= (eob[1] > 0) << ed_ctx->txb_itr;
569
0
        blk_ptr->v_has_coeff |= (eob[2] > 0) << ed_ctx->txb_itr;
570
0
        blk_ptr->eob.u[ed_ctx->txb_itr] = (uint16_t)eob[1];
571
0
        blk_ptr->eob.v[ed_ctx->txb_itr] = (uint16_t)eob[2];
572
0
    }
573
574
0
    return;
575
0
}
576
577
/**********************************************************
578
* Encode Generate Recon
579
*
580
* Summary: Performs an AV1 conformant
581
*   Inverse Transform and generate
582
*   the reconstructed samples of a TU.
583
*
584
* Inputs:
585
*   org_x
586
*   org_y
587
*   txb_size
588
*   sb_sz
589
*   input - Inverse Quantized Coeff (position sensitive)
590
*   pred - prediction samples (position independent)
591
*
592
* Outputs:
593
*   Recon  (position independent)
594
*
595
**********************************************************/
596
static void av1_encode_generate_recon(PictureControlSet* pcs, EncDecContext* ed_ctx, uint32_t org_x, uint32_t org_y,
597
                                      EbPictureBufferDesc* pred_samples, // no basis/offset
598
                                      EbPictureBufferDesc* residual16bit, // no basis/offset
599
0
                                      uint32_t component_mask, uint16_t* eob) {
600
0
    BlkStruct* blk_ptr = ed_ctx->blk_ptr;
601
602
    //**********************************
603
    // Luma
604
    //**********************************
605
0
    if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
606
0
        if ((blk_ptr->y_has_coeff & (1 << ed_ctx->txb_itr)) && blk_ptr->block_mi.skip_mode == false) {
607
0
            const TxSize   tx_size          = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ed_ctx->blk_geom->bsize];
608
0
            const uint32_t pred_luma_offset = (org_y * pred_samples->y_stride) + org_x;
609
0
            svt_aom_inv_transform_recon_wrapper(pcs,
610
0
                                                ed_ctx->md_ctx,
611
0
                                                pred_samples->y_buffer,
612
0
                                                pred_luma_offset,
613
0
                                                pred_samples->y_stride,
614
0
                                                pred_samples->y_buffer,
615
0
                                                pred_luma_offset,
616
0
                                                pred_samples->y_stride,
617
0
                                                ((int32_t*)residual16bit->y_buffer),
618
0
                                                ed_ctx->coded_area_sb,
619
0
                                                ed_ctx->bit_depth == EB_TEN_BIT ? 1 : 0, // hbd
620
0
                                                tx_size,
621
0
                                                blk_ptr->tx_type[ed_ctx->txb_itr],
622
0
                                                PLANE_TYPE_Y,
623
0
                                                eob[0]);
624
0
        }
625
0
    }
626
627
    //**********************************
628
    // Chroma
629
    //**********************************
630
0
    if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
631
0
        const TxSize   tx_size_uv     = av1_get_max_uv_txsize(ed_ctx->blk_geom->bsize, 1, 1);
632
0
        const uint32_t round_origin_x = ROUND_UV(org_x); // for Chroma blocks with size of 4
633
0
        const uint32_t round_origin_y = ROUND_UV(org_y); // for Chroma blocks with size of 4
634
635
        //**********************************
636
        // Cb
637
        //**********************************
638
0
        if ((blk_ptr->u_has_coeff & (1 << ed_ctx->txb_itr)) && blk_ptr->block_mi.skip_mode == false) {
639
0
            const uint32_t pred_offset_cb = ((round_origin_y >> 1) * pred_samples->u_stride) + (round_origin_x >> 1);
640
0
            svt_aom_inv_transform_recon_wrapper(pcs,
641
0
                                                ed_ctx->md_ctx,
642
0
                                                pred_samples->u_buffer,
643
0
                                                pred_offset_cb,
644
0
                                                pred_samples->u_stride,
645
0
                                                pred_samples->u_buffer,
646
0
                                                pred_offset_cb,
647
0
                                                pred_samples->u_stride,
648
0
                                                ((int32_t*)residual16bit->u_buffer),
649
0
                                                ed_ctx->coded_area_sb_uv,
650
0
                                                ed_ctx->bit_depth == EB_TEN_BIT ? 1 : 0, // hbd
651
0
                                                tx_size_uv,
652
0
                                                blk_ptr->tx_type_uv,
653
0
                                                PLANE_TYPE_UV,
654
0
                                                eob[1]);
655
0
        }
656
657
        //**********************************
658
        // Cr
659
        //**********************************
660
0
        if ((blk_ptr->v_has_coeff & (1 << ed_ctx->txb_itr)) && blk_ptr->block_mi.skip_mode == false) {
661
0
            const uint32_t pred_offset_cr = ((round_origin_y >> 1) * pred_samples->v_stride) + (round_origin_x >> 1);
662
0
            svt_aom_inv_transform_recon_wrapper(pcs,
663
0
                                                ed_ctx->md_ctx,
664
0
                                                pred_samples->v_buffer,
665
0
                                                pred_offset_cr,
666
0
                                                pred_samples->v_stride,
667
0
                                                pred_samples->v_buffer,
668
0
                                                pred_offset_cr,
669
0
                                                pred_samples->v_stride,
670
0
                                                ((int32_t*)residual16bit->v_buffer),
671
0
                                                ed_ctx->coded_area_sb_uv,
672
0
                                                ed_ctx->bit_depth == EB_TEN_BIT ? 1 : 0, // hbd
673
0
                                                tx_size_uv,
674
0
                                                blk_ptr->tx_type_uv,
675
0
                                                PLANE_TYPE_UV,
676
0
                                                eob[2]);
677
0
        }
678
0
    }
679
0
}
680
681
void svt_aom_store16bit_input_src(EbPictureBufferDesc* input_sample16bit_buffer, PictureControlSet* pcs, uint32_t sb_x,
682
0
                                  uint32_t sb_y, uint32_t sb_w, uint32_t sb_h) {
683
0
    uint32_t  row_it;
684
0
    uint16_t* from_ptr;
685
0
    uint16_t* to_ptr;
686
687
0
    from_ptr = (uint16_t*)input_sample16bit_buffer->y_buffer;
688
0
    to_ptr   = (uint16_t*)pcs->input_frame16bit->y_buffer + sb_x + (sb_y * pcs->input_frame16bit->y_stride);
689
690
0
    for (row_it = 0; row_it < sb_h; row_it++) {
691
0
        svt_memcpy(to_ptr + row_it * pcs->input_frame16bit->y_stride,
692
0
                   from_ptr + row_it * input_sample16bit_buffer->y_stride,
693
0
                   sb_w * 2);
694
0
    }
695
696
0
    sb_x = sb_x / 2;
697
0
    sb_y = sb_y / 2;
698
0
    sb_w = sb_w / 2;
699
0
    sb_h = sb_h / 2;
700
701
0
    from_ptr = (uint16_t*)input_sample16bit_buffer->u_buffer;
702
0
    to_ptr   = (uint16_t*)pcs->input_frame16bit->u_buffer + sb_x + (sb_y * pcs->input_frame16bit->u_stride);
703
704
0
    for (row_it = 0; row_it < sb_h; row_it++) {
705
0
        svt_memcpy(to_ptr + row_it * pcs->input_frame16bit->u_stride,
706
0
                   from_ptr + row_it * input_sample16bit_buffer->u_stride,
707
0
                   sb_w * 2);
708
0
    }
709
710
0
    from_ptr = (uint16_t*)input_sample16bit_buffer->v_buffer;
711
0
    to_ptr   = (uint16_t*)pcs->input_frame16bit->v_buffer + sb_x + (sb_y * pcs->input_frame16bit->v_stride);
712
713
0
    for (row_it = 0; row_it < sb_h; row_it++) {
714
0
        svt_memcpy(to_ptr + row_it * pcs->input_frame16bit->v_stride,
715
0
                   from_ptr + row_it * input_sample16bit_buffer->v_stride,
716
0
                   sb_w * 2);
717
0
    }
718
0
}
719
720
void svt_aom_update_mi_map_enc_dec(BlkStruct* blk_ptr, ModeDecisionContext* ctx, PictureControlSet* pcs);
721
722
0
static void perform_intra_coding_loop(PictureControlSet* pcs, EncDecContext* ed_ctx) {
723
0
    BlkStruct*           blk_ptr  = ed_ctx->blk_ptr;
724
0
    bool                 is_16bit = ed_ctx->is_16bit;
725
0
    uint8_t              is_inter = 0; // set to 0 b/c this is the intra path
726
0
    EbPictureBufferDesc* recon_buffer;
727
0
    EbPictureBufferDesc* coeff_buffer_sb  = pcs->ppcs->enc_dec_ptr->quantized_coeff[ed_ctx->sb_index];
728
0
    uint16_t             tile_idx         = ed_ctx->tile_index;
729
0
    NeighborArrayUnit*   ep_luma_recon_na = is_16bit ? pcs->ep_luma_recon_na_16bit[tile_idx]
730
0
                                                     : pcs->ep_luma_recon_na[tile_idx];
731
0
    NeighborArrayUnit* ep_cb_recon_na = is_16bit ? pcs->ep_cb_recon_na_16bit[tile_idx] : pcs->ep_cb_recon_na[tile_idx];
732
0
    NeighborArrayUnit* ep_cr_recon_na = is_16bit ? pcs->ep_cr_recon_na_16bit[tile_idx] : pcs->ep_cr_recon_na[tile_idx];
733
734
    // temp buffers for performing the transform/generating the recon
735
0
    EbPictureBufferDesc* residual_buffer      = ed_ctx->md_ctx->temp_residual;
736
0
    EbPictureBufferDesc* transform_buffer     = ed_ctx->md_ctx->tx_coeffs;
737
0
    EbPictureBufferDesc* inverse_quant_buffer = ed_ctx->md_ctx->cand_bf_ptr_array[0]->rec_coeff;
738
739
0
    blk_ptr->y_has_coeff = 0;
740
0
    blk_ptr->u_has_coeff = 0;
741
0
    blk_ptr->v_has_coeff = 0;
742
0
    uint16_t eobs[MAX_TXB_COUNT][3];
743
0
    svt_aom_get_recon_pic(pcs, &recon_buffer, is_16bit);
744
0
    const uint8_t  tx_depth       = blk_ptr->block_mi.tx_depth;
745
0
    const TxSize   tx_size        = tx_depth_to_tx_size[tx_depth][ed_ctx->blk_geom->bsize];
746
0
    const int      tx_width       = tx_size_wide[tx_size];
747
0
    const int      tx_height      = tx_size_high[tx_size];
748
0
    const TxSize   tx_size_uv     = av1_get_max_uv_txsize(ed_ctx->blk_geom->bsize, 1, 1);
749
0
    const int      tx_width_uv    = tx_size_wide[tx_size_uv];
750
0
    const int      tx_height_uv   = tx_size_high[tx_size_uv];
751
0
    const uint32_t tot_tu         = tx_blocks_per_depth[ed_ctx->blk_geom->bsize][tx_depth];
752
0
    const uint32_t sb_size_luma   = pcs->ppcs->scs->sb_size;
753
0
    const uint32_t sb_size_chroma = pcs->ppcs->scs->sb_size >> 1;
754
755
    // Luma path
756
0
    for (ed_ctx->txb_itr = 0; ed_ctx->txb_itr < tot_tu; ed_ctx->txb_itr++) {
757
0
        const uint16_t txb_origin_x = ed_ctx->blk_org_x +
758
0
            tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].x;
759
0
        const uint16_t txb_origin_y = ed_ctx->blk_org_y +
760
0
            tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].y;
761
0
        ed_ctx->md_ctx->luma_txb_skip_context = 0;
762
0
        ed_ctx->md_ctx->luma_dc_sign_context  = 0;
763
0
        svt_aom_get_txb_ctx(pcs,
764
0
                            COMPONENT_LUMA,
765
0
                            pcs->ep_luma_dc_sign_level_coeff_na[tile_idx],
766
0
                            txb_origin_x,
767
0
                            txb_origin_y,
768
0
                            ed_ctx->blk_geom->bsize,
769
0
                            tx_size,
770
0
                            &ed_ctx->md_ctx->luma_txb_skip_context,
771
0
                            &ed_ctx->md_ctx->luma_dc_sign_context);
772
773
        // Copy neighbour arrays for intra prediction
774
0
        const PredictionMode mode       = blk_ptr->block_mi.mode;
775
0
        const int            ang        = blk_ptr->block_mi.angle_delta[PLANE_TYPE_Y];
776
0
        const IntraSize      intra_size = ang == 0 ? svt_aom_intra_unit[mode] : (IntraSize){2, 2};
777
0
        uint8_t              top_neigh_array[(64 * 2 + 1) << 1];
778
0
        uint8_t              left_neigh_array[(64 * 2 + 1) << 1];
779
0
        if (txb_origin_y != 0) {
780
0
            svt_memcpy(top_neigh_array + ((uint64_t)1 << is_16bit),
781
0
                       ep_luma_recon_na->top_array + (txb_origin_x << is_16bit),
782
0
                       (tx_width * intra_size.top) << is_16bit);
783
0
        }
784
785
0
        if (txb_origin_x != 0) {
786
0
            uint16_t multipler = (txb_origin_y % sb_size_luma + tx_height * intra_size.left) > sb_size_luma
787
0
                ? 1
788
0
                : intra_size.left;
789
0
            svt_memcpy(left_neigh_array + ((uint64_t)1 << is_16bit),
790
0
                       ep_luma_recon_na->left_array + (txb_origin_y << is_16bit),
791
0
                       (tx_height * multipler) << is_16bit);
792
0
        }
793
794
0
        if (txb_origin_y != 0 && txb_origin_x != 0) {
795
0
            if (is_16bit) {
796
0
                uint16_t* top_hbd  = (uint16_t*)top_neigh_array;
797
0
                uint16_t* left_hbd = (uint16_t*)left_neigh_array;
798
0
                top_hbd[0] = left_hbd[0] = ((uint16_t*)(ep_luma_recon_na->top_left_array) +
799
0
                                            ep_luma_recon_na->max_pic_h + txb_origin_x - txb_origin_y)[0];
800
801
0
            } else {
802
0
                top_neigh_array[0] = left_neigh_array[0] =
803
0
                    ep_luma_recon_na->top_left_array[ep_luma_recon_na->max_pic_h + txb_origin_x - txb_origin_y];
804
0
            }
805
0
        }
806
807
0
        svt_av1_predict_intra_block(blk_ptr->av1xd,
808
0
                                    ed_ctx->blk_geom->bsize,
809
0
                                    tx_size,
810
0
                                    mode,
811
0
                                    blk_ptr->block_mi.angle_delta[PLANE_TYPE_Y],
812
0
                                    blk_ptr->palette_size[0] > 0,
813
0
                                    blk_ptr->palette_info,
814
0
                                    blk_ptr->block_mi.filter_intra_mode,
815
0
                                    top_neigh_array + ((uint64_t)1 << is_16bit),
816
0
                                    left_neigh_array + ((uint64_t)1 << is_16bit),
817
0
                                    recon_buffer,
818
0
                                    (tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].x) >> 2,
819
0
                                    (tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].y) >> 2,
820
0
                                    PLANE_Y,
821
0
                                    ed_ctx->md_ctx->shape,
822
0
                                    txb_origin_x,
823
0
                                    txb_origin_y,
824
0
                                    &pcs->scs->seq_header,
825
0
                                    ed_ctx->bit_depth);
826
827
        // Encode Transform Unit -INTRA-
828
0
        av1_encode_loop(pcs,
829
0
                        ed_ctx,
830
0
                        txb_origin_x,
831
0
                        txb_origin_y,
832
0
                        recon_buffer,
833
0
                        coeff_buffer_sb,
834
0
                        residual_buffer,
835
0
                        transform_buffer,
836
0
                        inverse_quant_buffer,
837
0
                        PICTURE_BUFFER_DESC_LUMA_MASK,
838
0
                        eobs[ed_ctx->txb_itr]);
839
0
        av1_encode_generate_recon(pcs,
840
0
                                  ed_ctx,
841
0
                                  txb_origin_x,
842
0
                                  txb_origin_y,
843
0
                                  recon_buffer,
844
0
                                  inverse_quant_buffer,
845
0
                                  PICTURE_BUFFER_DESC_LUMA_MASK,
846
0
                                  eobs[ed_ctx->txb_itr]);
847
848
        // Update Recon Samples-INTRA-
849
0
        encode_pass_update_recon_sample_neighbour_arrays(ep_luma_recon_na,
850
0
                                                         ep_cb_recon_na,
851
0
                                                         ep_cr_recon_na,
852
0
                                                         recon_buffer,
853
0
                                                         txb_origin_x,
854
0
                                                         txb_origin_y,
855
0
                                                         tx_width,
856
0
                                                         tx_height,
857
0
                                                         tx_width_uv,
858
0
                                                         tx_height_uv,
859
0
                                                         PICTURE_BUFFER_DESC_LUMA_MASK,
860
0
                                                         is_16bit);
861
862
0
        ed_ctx->coded_area_sb += tx_width * tx_height;
863
864
        // Update the luma Dc Sign Level Coeff Neighbor Array
865
0
        {
866
0
            uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.y[ed_ctx->txb_itr];
867
0
            svt_aom_neighbor_array_unit_mode_write(pcs->ep_luma_dc_sign_level_coeff_na[tile_idx],
868
0
                                                   (uint8_t*)&dc_sign_level_coeff,
869
0
                                                   txb_origin_x,
870
0
                                                   txb_origin_y,
871
0
                                                   tx_width,
872
0
                                                   tx_height,
873
0
                                                   NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
874
0
        }
875
0
    } // Transform Loop
876
877
    // Chroma path
878
879
0
    if (ed_ctx->md_ctx->has_uv) {
880
0
        ed_ctx->txb_itr       = 0;
881
0
        uint16_t txb_origin_x = ed_ctx->blk_org_x +
882
0
            tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].x;
883
0
        uint16_t txb_origin_y = ed_ctx->blk_org_y +
884
0
            tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].y;
885
0
        uint32_t blk_originx_uv = (ed_ctx->blk_org_x >> 3 << 3) >> 1;
886
0
        uint32_t blk_originy_uv = (ed_ctx->blk_org_y >> 3 << 3) >> 1;
887
888
0
        ed_ctx->md_ctx->cb_txb_skip_context = 0;
889
0
        ed_ctx->md_ctx->cb_dc_sign_context  = 0;
890
0
        svt_aom_get_txb_ctx(pcs,
891
0
                            COMPONENT_CHROMA,
892
0
                            pcs->ep_cb_dc_sign_level_coeff_na[tile_idx],
893
0
                            blk_originx_uv,
894
0
                            blk_originy_uv,
895
0
                            ed_ctx->blk_geom->bsize_uv,
896
0
                            tx_size_uv,
897
0
                            &ed_ctx->md_ctx->cb_txb_skip_context,
898
0
                            &ed_ctx->md_ctx->cb_dc_sign_context);
899
900
0
        ed_ctx->md_ctx->cr_txb_skip_context = 0;
901
0
        ed_ctx->md_ctx->cr_dc_sign_context  = 0;
902
0
        svt_aom_get_txb_ctx(pcs,
903
0
                            COMPONENT_CHROMA,
904
0
                            pcs->ep_cr_dc_sign_level_coeff_na[tile_idx],
905
0
                            blk_originx_uv,
906
0
                            blk_originy_uv,
907
0
                            ed_ctx->blk_geom->bsize_uv,
908
0
                            tx_size_uv,
909
0
                            &ed_ctx->md_ctx->cr_txb_skip_context,
910
0
                            &ed_ctx->md_ctx->cr_dc_sign_context);
911
912
        // Generate prediction for both chroma planes
913
0
        for (Plane plane = PLANE_U; plane <= PLANE_V; ++plane) {
914
0
            uint8_t top_neigh_array[(64 * 2 + 1) << 1];
915
0
            uint8_t left_neigh_array[(64 * 2 + 1) << 1];
916
917
            // Copy neighbour arrays for intra prediction
918
0
            const PredictionMode mode              = (blk_ptr->block_mi.uv_mode == UV_CFL_PRED)
919
0
                             ? (PredictionMode)UV_DC_PRED
920
0
                             : (PredictionMode)blk_ptr->block_mi.uv_mode;
921
0
            const int            ang               = blk_ptr->block_mi.angle_delta[PLANE_TYPE_UV];
922
0
            const IntraSize      intra_size        = ang == 0 ? svt_aom_intra_unit[mode] : (IntraSize){2, 2};
923
0
            NeighborArrayUnit*   eb_uv_neigh_array = plane == 1 ? ep_cb_recon_na : ep_cr_recon_na;
924
0
            if (blk_originy_uv != 0) {
925
0
                svt_memcpy(top_neigh_array + ((uint64_t)1 << is_16bit),
926
0
                           eb_uv_neigh_array->top_array + (blk_originx_uv << is_16bit),
927
0
                           (ed_ctx->blk_geom->bwidth_uv * intra_size.top) << is_16bit);
928
0
            }
929
930
0
            if (blk_originx_uv != 0) {
931
0
                uint16_t multipler = (blk_originy_uv % sb_size_chroma +
932
0
                                      ed_ctx->blk_geom->bheight_uv * intra_size.left) > sb_size_chroma
933
0
                    ? 1
934
0
                    : intra_size.left;
935
0
                svt_memcpy(left_neigh_array + ((uint64_t)1 << is_16bit),
936
0
                           eb_uv_neigh_array->left_array + (blk_originy_uv << is_16bit),
937
0
                           (ed_ctx->blk_geom->bheight_uv * multipler) << is_16bit);
938
0
            }
939
940
0
            if (blk_originy_uv != 0 && blk_originx_uv != 0) {
941
0
                if (is_16bit) {
942
0
                    uint16_t* top_hbd  = (uint16_t*)top_neigh_array;
943
0
                    uint16_t* left_hbd = (uint16_t*)left_neigh_array;
944
0
                    top_hbd[0] = left_hbd[0] = ((uint16_t*)(eb_uv_neigh_array->top_left_array) +
945
0
                                                eb_uv_neigh_array->max_pic_h + blk_originx_uv - blk_originy_uv)[0];
946
0
                } else {
947
0
                    top_neigh_array[0] = left_neigh_array[0] =
948
0
                        eb_uv_neigh_array
949
0
                            ->top_left_array[eb_uv_neigh_array->max_pic_h + blk_originx_uv - blk_originy_uv];
950
0
                }
951
0
            }
952
953
0
            svt_av1_predict_intra_block(blk_ptr->av1xd,
954
0
                                        ed_ctx->blk_geom->bsize,
955
0
                                        tx_size_uv,
956
0
                                        mode,
957
0
                                        blk_ptr->block_mi.angle_delta[PLANE_TYPE_UV],
958
0
                                        0, //chroma
959
0
                                        blk_ptr->palette_info,
960
0
                                        FILTER_INTRA_MODES,
961
0
                                        top_neigh_array + ((uint64_t)1 << is_16bit),
962
0
                                        left_neigh_array + ((uint64_t)1 << is_16bit),
963
0
                                        recon_buffer,
964
0
                                        0,
965
0
                                        0,
966
0
                                        plane,
967
0
                                        ed_ctx->md_ctx->shape,
968
0
                                        plane ? ROUND_UV(ed_ctx->blk_org_x) >> 1 : txb_origin_x,
969
0
                                        plane ? ROUND_UV(ed_ctx->blk_org_y) >> 1 : txb_origin_y,
970
0
                                        &pcs->scs->seq_header,
971
0
                                        ed_ctx->bit_depth);
972
0
        }
973
974
        // Encode Transform Unit -INTRA-
975
0
        av1_encode_loop(pcs,
976
0
                        ed_ctx,
977
0
                        txb_origin_x,
978
0
                        txb_origin_y,
979
0
                        recon_buffer,
980
0
                        coeff_buffer_sb,
981
0
                        residual_buffer,
982
0
                        transform_buffer,
983
0
                        inverse_quant_buffer,
984
0
                        PICTURE_BUFFER_DESC_CHROMA_MASK,
985
0
                        eobs[ed_ctx->txb_itr]);
986
0
        av1_encode_generate_recon(pcs,
987
0
                                  ed_ctx,
988
0
                                  txb_origin_x,
989
0
                                  txb_origin_y,
990
0
                                  recon_buffer,
991
0
                                  inverse_quant_buffer,
992
0
                                  PICTURE_BUFFER_DESC_CHROMA_MASK,
993
0
                                  eobs[ed_ctx->txb_itr]);
994
995
        // Update Recon Samples-INTRA-
996
0
        encode_pass_update_recon_sample_neighbour_arrays(ep_luma_recon_na,
997
0
                                                         ep_cb_recon_na,
998
0
                                                         ep_cr_recon_na,
999
0
                                                         recon_buffer,
1000
0
                                                         txb_origin_x,
1001
0
                                                         txb_origin_y,
1002
0
                                                         tx_width,
1003
0
                                                         tx_height,
1004
0
                                                         tx_width_uv,
1005
0
                                                         tx_height_uv,
1006
0
                                                         PICTURE_BUFFER_DESC_CHROMA_MASK,
1007
0
                                                         is_16bit);
1008
1009
0
        ed_ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv;
1010
1011
        // Update the cb Dc Sign Level Coeff Neighbor Array
1012
0
        {
1013
0
            uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.u[ed_ctx->txb_itr];
1014
0
            svt_aom_neighbor_array_unit_mode_write(pcs->ep_cb_dc_sign_level_coeff_na[tile_idx],
1015
0
                                                   (uint8_t*)&dc_sign_level_coeff,
1016
0
                                                   ROUND_UV(txb_origin_x) >> 1,
1017
0
                                                   ROUND_UV(txb_origin_y) >> 1,
1018
0
                                                   tx_width_uv,
1019
0
                                                   tx_height_uv,
1020
0
                                                   NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1021
0
        }
1022
1023
        // Update the cr DC Sign Level Coeff Neighbor Array
1024
0
        {
1025
0
            uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.v[ed_ctx->txb_itr];
1026
0
            svt_aom_neighbor_array_unit_mode_write(pcs->ep_cr_dc_sign_level_coeff_na[tile_idx],
1027
0
                                                   (uint8_t*)&dc_sign_level_coeff,
1028
0
                                                   ROUND_UV(txb_origin_x) >> 1,
1029
0
                                                   ROUND_UV(txb_origin_y) >> 1,
1030
0
                                                   tx_width_uv,
1031
0
                                                   tx_height_uv,
1032
0
                                                   NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1033
0
        }
1034
0
    } // Transform Loop
1035
0
    assert(IMPLIES(!ed_ctx->md_ctx->has_uv, blk_ptr->u_has_coeff == 0 && blk_ptr->v_has_coeff == 0));
1036
0
    blk_ptr->block_has_coeff = (blk_ptr->y_has_coeff || blk_ptr->u_has_coeff || blk_ptr->v_has_coeff);
1037
0
}
1038
1039
0
#define REFMVS_LIMIT ((1 << 12) - 1)
1040
1041
static void av1_copy_frame_mvs(PictureControlSet* pcs, const Av1Common* const cm, MbModeInfo mi, int mi_row, int mi_col,
1042
0
                               int x_mis, int y_mis, EbReferenceObject* object_ptr) {
1043
0
    const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
1044
0
    MV_REF*   frame_mvs        = object_ptr->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
1045
0
    x_mis                      = ROUND_POWER_OF_TWO(x_mis, 1);
1046
0
    y_mis                      = ROUND_POWER_OF_TWO(y_mis, 1);
1047
0
    int w, h;
1048
1049
0
    for (h = 0; h < y_mis; h++) {
1050
0
        MV_REF* mv = frame_mvs;
1051
0
        for (w = 0; w < x_mis; w++) {
1052
0
            mv->ref_frame = NONE_FRAME;
1053
0
            mv->mv.as_int = 0;
1054
1055
0
            for (int idx = 0; idx < 2; ++idx) {
1056
0
                MvReferenceFrame ref_frame = mi.block_mi.ref_frame[idx];
1057
0
                if (ref_frame > INTRA_FRAME) {
1058
0
                    int8_t ref_idx = pcs->ref_frame_side[ref_frame];
1059
0
                    if (ref_idx) {
1060
0
                        continue;
1061
0
                    }
1062
0
                    if ((abs(mi.block_mi.mv[idx].y) > REFMVS_LIMIT) || (abs(mi.block_mi.mv[idx].x) > REFMVS_LIMIT)) {
1063
0
                        continue;
1064
0
                    }
1065
0
                    mv->ref_frame = ref_frame;
1066
0
                    mv->mv.as_int = mi.block_mi.mv[idx].as_int;
1067
0
                }
1068
0
            }
1069
0
            mv++;
1070
0
        }
1071
0
        frame_mvs += frame_mvs_stride;
1072
0
    }
1073
0
}
1074
1075
/*
1076
 * Convert the recon picture from 16bit to 8bit.  Recon pic is passed through the pcs.
1077
 */
1078
0
void svt_aom_convert_recon_16bit_to_8bit(PictureControlSet* pcs, EncDecContext* ctx) {
1079
0
    EbPictureBufferDesc* recon_buffer_16bit;
1080
0
    EbPictureBufferDesc* recon_buffer_8bit;
1081
0
    svt_aom_get_recon_pic(pcs, &recon_buffer_16bit, 1);
1082
0
    if (pcs->ppcs->is_ref == true) {
1083
        // get the 16bit form of the input SB
1084
0
        recon_buffer_8bit = ((EbReferenceObject*)pcs->ppcs->ref_pic_wrapper->object_ptr)->reference_picture;
1085
0
    } else { // non ref pictures
1086
0
        recon_buffer_8bit = pcs->ppcs->enc_dec_ptr->recon_pic;
1087
0
    }
1088
1089
0
    uint32_t pred_buf_x_offest = ctx->blk_org_x;
1090
0
    uint32_t pred_buf_y_offest = ctx->blk_org_y;
1091
1092
0
    uint16_t* dst_16bit = (uint16_t*)(recon_buffer_16bit->y_buffer) + pred_buf_x_offest +
1093
0
        (pred_buf_y_offest * recon_buffer_16bit->y_stride);
1094
0
    int32_t dst_stride_16bit = recon_buffer_16bit->y_stride;
1095
1096
0
    uint8_t* dst = recon_buffer_8bit->y_buffer + pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_8bit->y_stride);
1097
0
    int32_t  dst_stride = recon_buffer_8bit->y_stride;
1098
1099
0
    svt_convert_16bit_to_8bit(
1100
0
        dst_16bit, dst_stride_16bit, dst, dst_stride, ctx->blk_geom->bwidth, ctx->blk_geom->bheight);
1101
1102
    //copy recon from 16bit to 8bit
1103
0
    pred_buf_x_offest = ROUND_UV(ctx->blk_org_x) >> 1;
1104
0
    pred_buf_y_offest = ROUND_UV(ctx->blk_org_y) >> 1;
1105
1106
0
    dst_16bit = (uint16_t*)(recon_buffer_16bit->u_buffer) + pred_buf_x_offest +
1107
0
        (pred_buf_y_offest * recon_buffer_16bit->u_stride);
1108
0
    dst_stride_16bit = recon_buffer_16bit->u_stride;
1109
1110
0
    dst        = recon_buffer_8bit->u_buffer + pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_8bit->u_stride);
1111
0
    dst_stride = recon_buffer_8bit->u_stride;
1112
1113
0
    svt_convert_16bit_to_8bit(
1114
0
        dst_16bit, dst_stride_16bit, dst, dst_stride, ctx->blk_geom->bwidth_uv, ctx->blk_geom->bheight_uv);
1115
1116
0
    dst_16bit = (uint16_t*)(recon_buffer_16bit->v_buffer) +
1117
0
        (pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_16bit->v_stride));
1118
0
    dst_stride_16bit = recon_buffer_16bit->v_stride;
1119
0
    dst        = recon_buffer_8bit->v_buffer + pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_8bit->v_stride);
1120
0
    dst_stride = recon_buffer_8bit->v_stride;
1121
1122
0
    svt_convert_16bit_to_8bit(
1123
0
        dst_16bit, dst_stride_16bit, dst, dst_stride, ctx->blk_geom->bwidth_uv, ctx->blk_geom->bheight_uv);
1124
0
}
1125
1126
/*
1127
 * Inter coding loop for EncDec process.
1128
 *
1129
 * For the given mode info, perform inter prediction, transform and recon.
1130
 * Update relevant neighbour arrays.
1131
 */
1132
0
static void perform_inter_coding_loop(PictureControlSet* pcs, EncDecContext* ctx) {
1133
0
    SequenceControlSet* scs      = pcs->scs;
1134
0
    const BlockGeom*    blk_geom = ctx->blk_geom;
1135
0
    BlkStruct*          blk_ptr  = ctx->blk_ptr;
1136
1137
    // temp buffers for performing the transform/generating the recon
1138
0
    EbPictureBufferDesc* residual_buffer      = ctx->md_ctx->temp_residual;
1139
0
    EbPictureBufferDesc* transform_buffer     = ctx->md_ctx->tx_coeffs;
1140
0
    EbPictureBufferDesc* inverse_quant_buffer = ctx->md_ctx->cand_bf_ptr_array[0]->rec_coeff;
1141
1142
0
    bool                 is_16bit = ctx->is_16bit;
1143
0
    EbPictureBufferDesc* recon_buffer;
1144
0
    EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index];
1145
0
    ModeDecisionContext* md_ctx          = ctx->md_ctx;
1146
0
    const int            is_inter        = is_inter_block(&blk_ptr->block_mi);
1147
0
    assert(is_inter);
1148
1149
    // Dereferencing early
1150
0
    uint16_t tile_idx = ctx->tile_index;
1151
1152
0
    NeighborArrayUnit* ep_luma_recon_na = is_16bit ? pcs->ep_luma_recon_na_16bit[tile_idx]
1153
0
                                                   : pcs->ep_luma_recon_na[tile_idx];
1154
0
    NeighborArrayUnit* ep_cb_recon_na = is_16bit ? pcs->ep_cb_recon_na_16bit[tile_idx] : pcs->ep_cb_recon_na[tile_idx];
1155
0
    NeighborArrayUnit* ep_cr_recon_na = is_16bit ? pcs->ep_cr_recon_na_16bit[tile_idx] : pcs->ep_cr_recon_na[tile_idx];
1156
1157
0
    svt_aom_get_recon_pic(pcs, &recon_buffer, is_16bit);
1158
1159
    // Inter Prediction
1160
0
    EbPictureBufferDesc* ref_pic_list0;
1161
0
    EbPictureBufferDesc* ref_pic_list1;
1162
0
    if (blk_ptr->block_mi.use_intrabc) {
1163
0
        svt_aom_get_recon_pic(pcs, &ref_pic_list0, is_16bit);
1164
0
        ref_pic_list1 = (EbPictureBufferDesc*)NULL;
1165
0
    } else {
1166
0
        ref_pic_list0 = svt_aom_get_ref_pic_buffer(pcs, blk_ptr->block_mi.ref_frame[0]);
1167
0
        ref_pic_list1 = svt_aom_get_ref_pic_buffer(pcs, blk_ptr->block_mi.ref_frame[1]);
1168
0
    }
1169
1170
0
    svt_aom_inter_prediction(scs,
1171
0
                             pcs,
1172
0
                             &blk_ptr->block_mi,
1173
0
                             &md_ctx->blk_ptr->wm_params_l0,
1174
0
                             &md_ctx->blk_ptr->wm_params_l1,
1175
0
                             blk_ptr,
1176
0
                             blk_geom->bsize,
1177
0
                             ctx->md_ctx->shape,
1178
0
                             false, //use_precomputed_obmc,
1179
0
                             false, //use_precomputed_ii
1180
0
                             NULL, // md_ctx - only needed for precompute obmc/ii
1181
0
                             ep_luma_recon_na,
1182
0
                             ep_cb_recon_na,
1183
0
                             ep_cr_recon_na,
1184
0
                             ref_pic_list0,
1185
0
                             ref_pic_list1,
1186
0
                             ctx->blk_org_x,
1187
0
                             ctx->blk_org_y,
1188
0
                             recon_buffer,
1189
0
                             ctx->blk_org_x,
1190
0
                             ctx->blk_org_y,
1191
0
                             md_ctx->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
1192
0
                             (uint8_t)scs->static_config.encoder_bit_depth,
1193
0
                             is_16bit);
1194
1195
    // Transform Loop
1196
0
    blk_ptr->y_has_coeff = 0;
1197
0
    blk_ptr->u_has_coeff = 0;
1198
0
    blk_ptr->v_has_coeff = 0;
1199
1200
    // Initialize the Transform Loop
1201
0
    uint16_t       eobs[MAX_TXB_COUNT][3];
1202
0
    const uint8_t  tx_depth     = blk_ptr->block_mi.tx_depth;
1203
0
    const uint16_t tot_tu       = tx_blocks_per_depth[blk_geom->bsize][tx_depth];
1204
0
    const TxSize   tx_size      = tx_depth_to_tx_size[tx_depth][blk_geom->bsize];
1205
0
    const int      tx_width     = tx_size_wide[tx_size];
1206
0
    const int      tx_height    = tx_size_high[tx_size];
1207
0
    const TxSize   tx_size_uv   = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1);
1208
0
    const int      tx_width_uv  = tx_size_wide[tx_size_uv];
1209
0
    const int      tx_height_uv = tx_size_high[tx_size_uv];
1210
1211
0
    for (ctx->txb_itr = 0; ctx->txb_itr < tot_tu; ctx->txb_itr++) {
1212
0
        const uint8_t  uv_pass        = tx_depth && ctx->txb_itr ? 0 : 1; //NM: 128x128 exeption
1213
0
        const uint16_t txb_origin_x   = ctx->blk_org_x + tx_org[blk_geom->bsize][is_inter][tx_depth][ctx->txb_itr].x;
1214
0
        const uint16_t txb_origin_y   = ctx->blk_org_y + tx_org[blk_geom->bsize][is_inter][tx_depth][ctx->txb_itr].y;
1215
0
        md_ctx->luma_txb_skip_context = 0;
1216
0
        md_ctx->luma_dc_sign_context  = 0;
1217
0
        svt_aom_get_txb_ctx(pcs,
1218
0
                            COMPONENT_LUMA,
1219
0
                            pcs->ep_luma_dc_sign_level_coeff_na[tile_idx],
1220
0
                            txb_origin_x,
1221
0
                            txb_origin_y,
1222
0
                            blk_geom->bsize,
1223
0
                            tx_size,
1224
0
                            &md_ctx->luma_txb_skip_context,
1225
0
                            &md_ctx->luma_dc_sign_context);
1226
1227
0
        if (md_ctx->has_uv && uv_pass) {
1228
0
            md_ctx->cb_txb_skip_context = 0;
1229
0
            md_ctx->cb_dc_sign_context  = 0;
1230
0
            svt_aom_get_txb_ctx(pcs,
1231
0
                                COMPONENT_CHROMA,
1232
0
                                pcs->ep_cb_dc_sign_level_coeff_na[tile_idx],
1233
0
                                ROUND_UV(txb_origin_x) >> 1,
1234
0
                                ROUND_UV(txb_origin_y) >> 1,
1235
0
                                blk_geom->bsize_uv,
1236
0
                                tx_size_uv,
1237
0
                                &md_ctx->cb_txb_skip_context,
1238
0
                                &md_ctx->cb_dc_sign_context);
1239
1240
0
            md_ctx->cr_txb_skip_context = 0;
1241
0
            md_ctx->cr_dc_sign_context  = 0;
1242
0
            svt_aom_get_txb_ctx(pcs,
1243
0
                                COMPONENT_CHROMA,
1244
0
                                pcs->ep_cr_dc_sign_level_coeff_na[tile_idx],
1245
0
                                ROUND_UV(txb_origin_x) >> 1,
1246
0
                                ROUND_UV(txb_origin_y) >> 1,
1247
0
                                blk_geom->bsize_uv,
1248
0
                                tx_size_uv,
1249
0
                                &md_ctx->cr_txb_skip_context,
1250
0
                                &md_ctx->cr_dc_sign_context);
1251
0
        }
1252
0
        if (blk_ptr->block_mi.skip_mode == true) {
1253
0
            blk_ptr->y_has_coeff = 0;
1254
0
            blk_ptr->u_has_coeff = 0;
1255
0
            blk_ptr->v_has_coeff = 0;
1256
1257
0
            blk_ptr->quant_dc.y[ctx->txb_itr] = 0;
1258
0
            blk_ptr->quant_dc.u[ctx->txb_itr] = 0;
1259
0
            blk_ptr->quant_dc.v[ctx->txb_itr] = 0;
1260
0
        } else {
1261
            //inter mode  2
1262
0
            av1_encode_loop(pcs,
1263
0
                            ctx,
1264
0
                            txb_origin_x, //pic offset
1265
0
                            txb_origin_y,
1266
0
                            recon_buffer,
1267
0
                            coeff_buffer_sb,
1268
0
                            residual_buffer,
1269
0
                            transform_buffer,
1270
0
                            inverse_quant_buffer,
1271
0
                            md_ctx->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
1272
0
                            eobs[ctx->txb_itr]);
1273
0
        }
1274
1275
        //inter mode
1276
0
        av1_encode_generate_recon(
1277
0
            pcs,
1278
0
            ctx,
1279
0
            txb_origin_x, //pic offset
1280
0
            txb_origin_y,
1281
0
            recon_buffer,
1282
0
            inverse_quant_buffer,
1283
0
            md_ctx->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
1284
0
            eobs[ctx->txb_itr]);
1285
1286
0
        ctx->coded_area_sb += tx_width * tx_height;
1287
1288
0
        if (md_ctx->has_uv && uv_pass) {
1289
0
            ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv;
1290
0
        }
1291
1292
        // Update the luma Dc Sign Level Coeff Neighbor Array
1293
0
        uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.y[ctx->txb_itr];
1294
1295
0
        svt_aom_neighbor_array_unit_mode_write(pcs->ep_luma_dc_sign_level_coeff_na[tile_idx],
1296
0
                                               (uint8_t*)&dc_sign_level_coeff,
1297
0
                                               txb_origin_x,
1298
0
                                               txb_origin_y,
1299
0
                                               tx_width,
1300
0
                                               tx_height,
1301
0
                                               NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1302
1303
        // Update the cb Dc Sign Level Coeff Neighbor Array
1304
0
        if (md_ctx->has_uv && uv_pass) {
1305
0
            dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.u[ctx->txb_itr];
1306
1307
0
            svt_aom_neighbor_array_unit_mode_write(pcs->ep_cb_dc_sign_level_coeff_na[tile_idx],
1308
0
                                                   (uint8_t*)&dc_sign_level_coeff,
1309
0
                                                   ROUND_UV(txb_origin_x) >> 1,
1310
0
                                                   ROUND_UV(txb_origin_y) >> 1,
1311
0
                                                   tx_width_uv,
1312
0
                                                   tx_height_uv,
1313
0
                                                   NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1314
            // Update the cr DC Sign Level Coeff Neighbor Array
1315
0
            dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.v[ctx->txb_itr];
1316
1317
0
            svt_aom_neighbor_array_unit_mode_write(pcs->ep_cr_dc_sign_level_coeff_na[tile_idx],
1318
0
                                                   (uint8_t*)&dc_sign_level_coeff,
1319
0
                                                   ROUND_UV(txb_origin_x) >> 1,
1320
0
                                                   ROUND_UV(txb_origin_y) >> 1,
1321
0
                                                   tx_width_uv,
1322
0
                                                   tx_height_uv,
1323
0
                                                   NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1324
0
        }
1325
1326
0
    } // Transform Loop
1327
1328
0
    assert(IMPLIES(!md_ctx->has_uv, blk_ptr->u_has_coeff == 0 && blk_ptr->v_has_coeff == 0));
1329
0
    blk_ptr->block_has_coeff = (blk_ptr->y_has_coeff || blk_ptr->u_has_coeff || blk_ptr->v_has_coeff);
1330
1331
    // Update Recon Samples Neighbor Arrays -INTER-
1332
0
    encode_pass_update_recon_sample_neighbour_arrays(
1333
0
        ep_luma_recon_na,
1334
0
        ep_cb_recon_na,
1335
0
        ep_cr_recon_na,
1336
0
        recon_buffer,
1337
0
        ctx->blk_org_x,
1338
0
        ctx->blk_org_y,
1339
0
        ctx->blk_geom->bwidth,
1340
0
        ctx->blk_geom->bheight,
1341
0
        ctx->blk_geom->bwidth_uv,
1342
0
        ctx->blk_geom->bheight_uv,
1343
0
        md_ctx->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
1344
0
        is_16bit);
1345
0
}
1346
1347
// Copy recon to EncDec buffers if EncDec was bypassed. If pred depth only was used and NSQ is OFF data
1348
// was copied directly to EncDec buffers in MD.
1349
120k
static void copy_recon(PictureControlSet* pcs, ModeDecisionContext* ctx, BlkStruct* blk_ptr) {
1350
120k
    const bool           is_16bit = ctx->ed_ctx->is_16bit;
1351
120k
    EbPictureBufferDesc* recon_buffer;
1352
120k
    svt_aom_get_recon_pic(pcs, &recon_buffer, is_16bit);
1353
120k
    if (ctx->encoder_bit_depth > EB_EIGHT_BIT) {
1354
0
        uint32_t  recon_luma_offset = (ctx->blk_org_y * recon_buffer->y_stride) + ctx->blk_org_x;
1355
0
        uint16_t* ep_recon          = ((uint16_t*)(recon_buffer->y_buffer)) + recon_luma_offset;
1356
0
        uint16_t* md_recon          = (uint16_t*)(blk_ptr->recon_tmp->y_buffer);
1357
1358
0
        for (uint32_t i = 0; i < ctx->blk_geom->bheight; i++) {
1359
0
            svt_memcpy(ep_recon + i * recon_buffer->y_stride,
1360
0
                       md_recon + i * blk_ptr->recon_tmp->y_stride,
1361
0
                       ctx->blk_geom->bwidth * sizeof(uint16_t));
1362
0
        }
1363
1364
0
        if (ctx->has_uv) {
1365
0
            uint32_t round_origin_x = ROUND_UV(ctx->blk_org_x); // for Chroma blocks with size of 4
1366
0
            uint32_t round_origin_y = ROUND_UV(ctx->blk_org_y); // for Chroma blocks with size of 4
1367
1368
            // Cr
1369
0
            uint32_t  recon_cr_offset = ((round_origin_y >> 1) * recon_buffer->v_stride) + (round_origin_x >> 1);
1370
0
            uint16_t* ep_recon_cr     = ((uint16_t*)(recon_buffer->v_buffer)) + recon_cr_offset;
1371
0
            uint16_t* md_recon_cr     = (uint16_t*)(blk_ptr->recon_tmp->v_buffer);
1372
1373
0
            for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) {
1374
0
                svt_memcpy(ep_recon_cr + i * recon_buffer->v_stride,
1375
0
                           md_recon_cr + i * blk_ptr->recon_tmp->v_stride,
1376
0
                           ctx->blk_geom->bwidth_uv * sizeof(uint16_t));
1377
0
            }
1378
1379
            // Cb
1380
0
            uint32_t  recon_cb_offset = ((round_origin_y >> 1) * recon_buffer->u_stride) + (round_origin_x >> 1);
1381
0
            uint16_t* ep_recon_cb     = ((uint16_t*)(recon_buffer->u_buffer)) + recon_cb_offset;
1382
0
            uint16_t* md_recon_cb     = (uint16_t*)(blk_ptr->recon_tmp->u_buffer);
1383
1384
0
            for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) {
1385
0
                svt_memcpy(ep_recon_cb + i * recon_buffer->u_stride,
1386
0
                           md_recon_cb + i * blk_ptr->recon_tmp->u_stride,
1387
0
                           ctx->blk_geom->bwidth_uv * sizeof(uint16_t));
1388
0
            }
1389
0
        }
1390
120k
    } else {
1391
120k
        uint32_t recon_luma_offset = (ctx->blk_org_y * recon_buffer->y_stride) + ctx->blk_org_x;
1392
120k
        uint8_t* ep_recon          = recon_buffer->y_buffer + recon_luma_offset;
1393
120k
        uint8_t* md_recon          = blk_ptr->recon_tmp->y_buffer;
1394
1395
1.08M
        for (uint32_t i = 0; i < ctx->blk_geom->bheight; i++) {
1396
964k
            svt_memcpy(ep_recon + i * recon_buffer->y_stride,
1397
964k
                       md_recon + i * blk_ptr->recon_tmp->y_stride,
1398
964k
                       ctx->blk_geom->bwidth * sizeof(uint8_t));
1399
964k
        }
1400
1401
120k
        if (ctx->has_uv) {
1402
120k
            uint32_t round_origin_x = ROUND_UV(ctx->blk_org_x); // for Chroma blocks with size of 4
1403
120k
            uint32_t round_origin_y = ROUND_UV(ctx->blk_org_y); // for Chroma blocks with size of 4
1404
1405
            // Cr
1406
120k
            uint32_t recon_cr_offset = ((round_origin_y >> 1) * recon_buffer->v_stride) + (round_origin_x >> 1);
1407
120k
            uint8_t* ep_recon_cr     = recon_buffer->v_buffer + recon_cr_offset;
1408
120k
            uint8_t* md_recon_cr     = blk_ptr->recon_tmp->v_buffer;
1409
1410
603k
            for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) {
1411
482k
                svt_memcpy(ep_recon_cr + i * recon_buffer->v_stride,
1412
482k
                           md_recon_cr + i * blk_ptr->recon_tmp->v_stride,
1413
482k
                           ctx->blk_geom->bwidth_uv * sizeof(uint8_t));
1414
482k
            }
1415
1416
            // Cb
1417
120k
            uint32_t recon_cb_offset = ((round_origin_y >> 1) * recon_buffer->u_stride) + (round_origin_x >> 1);
1418
120k
            uint8_t* ep_recon_cb     = recon_buffer->u_buffer + recon_cb_offset;
1419
120k
            uint8_t* md_recon_cb     = blk_ptr->recon_tmp->u_buffer;
1420
1421
603k
            for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) {
1422
482k
                svt_memcpy(ep_recon_cb + i * recon_buffer->u_stride,
1423
482k
                           md_recon_cb + i * blk_ptr->recon_tmp->u_stride,
1424
482k
                           ctx->blk_geom->bwidth_uv * sizeof(uint8_t));
1425
482k
            }
1426
120k
        }
1427
120k
    }
1428
120k
}
1429
1430
// Copy quantized coeffs to EncDec buffers if EncDec was bypassed. If pred depth only was used and NSQ is OFF data
1431
// was copied directly to EncDec buffers in MD.
1432
static void copy_qcoeffs(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr, uint32_t blk_coded_area,
1433
482k
                         uint32_t blk_coded_area_uv) {
1434
482k
    const BlockGeom*     blk_geom        = ctx->blk_geom;
1435
482k
    EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index];
1436
482k
    const uint8_t        tx_depth        = blk_ptr->block_mi.tx_depth;
1437
482k
    const uint8_t        txb_itr         = ctx->txb_itr;
1438
482k
    const uint8_t        uv_pass         = tx_depth && txb_itr ? 0 : 1; //NM: 128x128 exeption
1439
1440
482k
    int32_t* ep_coeff = ((int32_t*)coeff_buffer_sb->y_buffer) + ctx->coded_area_sb_update;
1441
482k
    int32_t* md_coeff = ((int32_t*)blk_ptr->coeff_tmp->y_buffer) + blk_coded_area;
1442
1443
482k
    if ((blk_ptr->y_has_coeff & (1 << txb_itr))) {
1444
2.07k
        const TxSize tx_size   = tx_depth_to_tx_size[tx_depth][blk_geom->bsize];
1445
2.07k
        const int    tx_width  = tx_size_wide[tx_size];
1446
2.07k
        const int    tx_height = tx_size_high[tx_size];
1447
2.07k
        svt_memcpy(ep_coeff, md_coeff, sizeof(int32_t) * tx_height * tx_width);
1448
2.07k
    }
1449
1450
482k
    if (ctx->md_ctx->has_uv && uv_pass) {
1451
120k
        const TxSize tx_size_uv   = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1);
1452
120k
        const int    tx_width_uv  = tx_size_wide[tx_size_uv];
1453
120k
        const int    tx_height_uv = tx_size_high[tx_size_uv];
1454
120k
        int32_t*     ep_coeff_cb  = ((int32_t*)coeff_buffer_sb->u_buffer) + ctx->coded_area_sb_uv_update;
1455
120k
        int32_t*     md_coeff_cb  = ((int32_t*)blk_ptr->coeff_tmp->u_buffer) + blk_coded_area_uv;
1456
1457
120k
        if ((blk_ptr->u_has_coeff & (1 << txb_itr))) {
1458
2.07k
            svt_memcpy(ep_coeff_cb, md_coeff_cb, sizeof(int32_t) * tx_height_uv * tx_width_uv);
1459
2.07k
        }
1460
1461
120k
        int32_t* ep_coeff_cr = ((int32_t*)coeff_buffer_sb->v_buffer) + ctx->coded_area_sb_uv_update;
1462
120k
        int32_t* md_coeff_cr = ((int32_t*)blk_ptr->coeff_tmp->v_buffer) + blk_coded_area_uv;
1463
1464
120k
        if ((blk_ptr->v_has_coeff & (1 << txb_itr))) {
1465
2.07k
            svt_memcpy(ep_coeff_cr, md_coeff_cr, sizeof(int32_t) * tx_height_uv * tx_width_uv);
1466
2.07k
        }
1467
120k
    }
1468
482k
}
1469
1470
// Perform CDF update (MD feature) for coeff-related CDFs
1471
0
void update_coeff_cdf(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr) {
1472
0
    ModeDecisionContext* md_ctx          = ctx->md_ctx;
1473
0
    const BlockGeom*     blk_geom        = ctx->blk_geom;
1474
0
    EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index];
1475
0
    const uint8_t        tx_depth        = blk_ptr->block_mi.tx_depth;
1476
0
    const uint8_t        txb_itr         = ctx->txb_itr;
1477
0
    const uint8_t        uv_pass         = tx_depth && ctx->txb_itr ? 0 : 1; //NM: 128x128 exeption
1478
0
    const uint16_t       tile_idx        = ctx->tile_index;
1479
0
    const int            is_inter        = is_inter_block(&blk_ptr->block_mi);
1480
0
    const TxSize         tx_size         = tx_depth_to_tx_size[tx_depth][blk_geom->bsize];
1481
0
    const int            tx_width        = tx_size_wide[tx_size];
1482
0
    const int            tx_height       = tx_size_high[tx_size];
1483
0
    const TxSize         tx_size_uv      = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1);
1484
0
    const int            tx_width_uv     = tx_size_wide[tx_size_uv];
1485
0
    const int            tx_height_uv    = tx_size_high[tx_size_uv];
1486
0
    const uint16_t       txb_origin_x    = ctx->blk_org_x + tx_org[blk_geom->bsize][is_inter][tx_depth][txb_itr].x;
1487
0
    const uint16_t       txb_origin_y    = ctx->blk_org_y + tx_org[blk_geom->bsize][is_inter][tx_depth][txb_itr].y;
1488
1489
0
    md_ctx->luma_txb_skip_context = 0;
1490
0
    md_ctx->luma_dc_sign_context  = 0;
1491
0
    svt_aom_get_txb_ctx(pcs,
1492
0
                        COMPONENT_LUMA,
1493
0
                        pcs->ep_luma_dc_sign_level_coeff_na_update[tile_idx],
1494
0
                        txb_origin_x,
1495
0
                        txb_origin_y,
1496
0
                        blk_geom->bsize,
1497
0
                        tx_size,
1498
0
                        &md_ctx->luma_txb_skip_context,
1499
0
                        &md_ctx->luma_dc_sign_context);
1500
1501
0
    if (md_ctx->has_uv && uv_pass) {
1502
0
        md_ctx->cb_txb_skip_context = 0;
1503
0
        md_ctx->cb_dc_sign_context  = 0;
1504
0
        svt_aom_get_txb_ctx(pcs,
1505
0
                            COMPONENT_CHROMA,
1506
0
                            pcs->ep_cb_dc_sign_level_coeff_na_update[tile_idx],
1507
0
                            ROUND_UV(txb_origin_x) >> 1,
1508
0
                            ROUND_UV(txb_origin_y) >> 1,
1509
0
                            blk_geom->bsize_uv,
1510
0
                            tx_size_uv,
1511
0
                            &md_ctx->cb_txb_skip_context,
1512
0
                            &md_ctx->cb_dc_sign_context);
1513
1514
0
        md_ctx->cr_txb_skip_context = 0;
1515
0
        md_ctx->cr_dc_sign_context  = 0;
1516
0
        svt_aom_get_txb_ctx(pcs,
1517
0
                            COMPONENT_CHROMA,
1518
0
                            pcs->ep_cr_dc_sign_level_coeff_na_update[tile_idx],
1519
0
                            ROUND_UV(txb_origin_x) >> 1,
1520
0
                            ROUND_UV(txb_origin_y) >> 1,
1521
0
                            blk_geom->bsize_uv,
1522
0
                            tx_size_uv,
1523
0
                            &md_ctx->cr_txb_skip_context,
1524
0
                            &md_ctx->cr_dc_sign_context);
1525
0
    }
1526
1527
0
    ModeDecisionCandidateBuffer** cand_bf_ptr_array_base = md_ctx->cand_bf_ptr_array;
1528
0
    ModeDecisionCandidateBuffer** cand_bf_ptr_array      = &(cand_bf_ptr_array_base[0]);
1529
0
    ModeDecisionCandidateBuffer*  cand_bf;
1530
1531
    // Set the Candidate Buffer
1532
0
    cand_bf = cand_bf_ptr_array[0];
1533
    // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
1534
0
    cand_bf->cand->block_mi.mode              = blk_ptr->block_mi.mode;
1535
0
    cand_bf->cand->block_mi.filter_intra_mode = blk_ptr->block_mi.filter_intra_mode;
1536
0
    if (blk_ptr->block_has_coeff) {
1537
0
        uint64_t y_txb_coeff_bits;
1538
0
        uint64_t cb_txb_coeff_bits;
1539
0
        uint64_t cr_txb_coeff_bits;
1540
0
        svt_aom_txb_estimate_coeff_bits(md_ctx,
1541
0
                                        1, //allow_update_cdf,
1542
0
                                        &pcs->ec_ctx_array[ctx->sb_index],
1543
0
                                        pcs,
1544
0
                                        cand_bf,
1545
0
                                        ctx->coded_area_sb_update,
1546
0
                                        ctx->coded_area_sb_uv_update,
1547
0
                                        coeff_buffer_sb,
1548
0
                                        blk_ptr->eob.y[txb_itr],
1549
0
                                        blk_ptr->eob.u[txb_itr],
1550
0
                                        blk_ptr->eob.v[txb_itr],
1551
0
                                        &y_txb_coeff_bits,
1552
0
                                        &cb_txb_coeff_bits,
1553
0
                                        &cr_txb_coeff_bits,
1554
0
                                        tx_size,
1555
0
                                        tx_size_uv,
1556
0
                                        blk_ptr->tx_type[txb_itr],
1557
0
                                        blk_ptr->tx_type_uv,
1558
0
                                        (md_ctx->has_uv && uv_pass) ? COMPONENT_ALL : COMPONENT_LUMA);
1559
0
    }
1560
1561
    // Update the luma DC Sign Level Coeff Neighbor Array
1562
0
    uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.y[txb_itr];
1563
1564
0
    svt_aom_neighbor_array_unit_mode_write(pcs->ep_luma_dc_sign_level_coeff_na_update[tile_idx],
1565
0
                                           (uint8_t*)&dc_sign_level_coeff,
1566
0
                                           txb_origin_x,
1567
0
                                           txb_origin_y,
1568
0
                                           tx_width,
1569
0
                                           tx_height,
1570
0
                                           NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1571
1572
    // Update the Cb DC Sign Level Coeff Neighbor Array
1573
0
    if (md_ctx->has_uv && uv_pass) {
1574
0
        dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.u[txb_itr];
1575
1576
0
        svt_aom_neighbor_array_unit_mode_write(pcs->ep_cb_dc_sign_level_coeff_na_update[tile_idx],
1577
0
                                               (uint8_t*)&dc_sign_level_coeff,
1578
0
                                               ROUND_UV(txb_origin_x) >> 1,
1579
0
                                               ROUND_UV(txb_origin_y) >> 1,
1580
0
                                               tx_width_uv,
1581
0
                                               tx_height_uv,
1582
0
                                               NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1583
1584
        // Update the Cr DC Sign Level Coeff Neighbor Array
1585
0
        dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.v[txb_itr];
1586
1587
0
        svt_aom_neighbor_array_unit_mode_write(pcs->ep_cr_dc_sign_level_coeff_na_update[tile_idx],
1588
0
                                               (uint8_t*)&dc_sign_level_coeff,
1589
0
                                               ROUND_UV(txb_origin_x) >> 1,
1590
0
                                               ROUND_UV(txb_origin_y) >> 1,
1591
0
                                               tx_width_uv,
1592
0
                                               tx_height_uv,
1593
0
                                               NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1594
0
    }
1595
0
}
1596
1597
// Update encode-related data for the passed block
1598
// expects ctx->blk_geom, ctx->blk_ptr, ctx->blk_org_x, ctx->blk_org_y to be set
1599
128k
static void update_b(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr, EcBlkStruct** output_blk_ptr) {
1600
128k
    ModeDecisionContext* md_ctx   = ctx->md_ctx;
1601
128k
    const BlockGeom*     blk_geom = ctx->blk_geom;
1602
128k
    SuperBlock*          sb_ptr   = md_ctx->sb_ptr;
1603
128k
    int                  sb_index = ctx->sb_index;
1604
128k
    const uint16_t       tile_idx = ctx->tile_index;
1605
1606
128k
    if (!pcs->scs->allintra) {
1607
0
        if (is_intra_mode(blk_ptr->block_mi.mode)) {
1608
0
            ctx->tot_intra_coded_area += blk_geom->bwidth * blk_geom->bheight;
1609
0
            pcs->sb_intra[sb_index] = 1;
1610
0
        } else {
1611
0
            if (pcs->ppcs->frm_hdr.allow_high_precision_mv) {
1612
0
                bool hp = (blk_ptr->block_mi.mv[0].x % 2 != 0 || blk_ptr->block_mi.mv[0].y % 2 != 0);
1613
0
                if (!hp && has_second_ref(&blk_ptr->block_mi)) {
1614
0
                    hp = (blk_ptr->block_mi.mv[1].x % 2 != 0 || blk_ptr->block_mi.mv[1].y % 2 != 0);
1615
0
                }
1616
0
                if (hp) {
1617
0
                    ctx->tot_hp_coded_area += blk_geom->bwidth * blk_geom->bheight;
1618
0
                }
1619
0
            }
1620
0
            bool is_zero_mv = 0;
1621
0
            if (abs(blk_ptr->block_mi.mv[0].x) < 8 && abs(blk_ptr->block_mi.mv[0].y) < 8) {
1622
0
                is_zero_mv = 1;
1623
0
            }
1624
0
            if (has_second_ref(&blk_ptr->block_mi)) {
1625
0
                if (abs(blk_ptr->block_mi.mv[1].x) < 8 && abs(blk_ptr->block_mi.mv[1].y) < 8) {
1626
0
                    is_zero_mv = 1;
1627
0
                }
1628
0
            }
1629
0
            if (is_zero_mv) {
1630
0
                ctx->tot_cnt_zero_mv += blk_geom->bwidth * blk_geom->bheight;
1631
0
            }
1632
0
            if (blk_geom->sq_size == pcs->scs->sb_size && blk_ptr->block_mi.mode != NEWMV &&
1633
0
                blk_ptr->block_mi.mode != NEW_NEWMV) {
1634
0
                pcs->sb_64x64_mvp[sb_index] = 1;
1635
0
            }
1636
0
        }
1637
1638
0
        if (blk_ptr->block_has_coeff == 0) {
1639
0
            ctx->tot_skip_coded_area += blk_geom->bwidth * blk_geom->bheight;
1640
0
        } else {
1641
0
            pcs->sb_skip[sb_index] = 0;
1642
0
        }
1643
0
        pcs->sb_min_sq_size[sb_index] = MIN(blk_geom->sq_size, pcs->sb_min_sq_size[sb_index]);
1644
0
        pcs->sb_max_sq_size[sb_index] = MAX(blk_geom->sq_size, pcs->sb_max_sq_size[sb_index]);
1645
0
    }
1646
128k
    svt_block_on_mutex(pcs->ppcs->pcs_total_rate_mutex);
1647
128k
    pcs->ppcs->pcs_total_rate += blk_ptr->total_rate;
1648
128k
    svt_release_mutex(pcs->ppcs->pcs_total_rate_mutex);
1649
1650
    // If needed, copy recon and qcoeffs from MD buffers to EC buffers and update coeff-related CDFs
1651
128k
    if (pcs->cdf_ctrl.update_coef || (md_ctx->bypass_encdec && !(md_ctx->fixed_partition))) {
1652
        // Copy recon to EncDec buffers if EncDec was bypassed; if pred depth only was used
1653
        // and NSQ is OFF data was copied directly to EncDec buffers in MD
1654
120k
        if (md_ctx->bypass_encdec && !(md_ctx->fixed_partition)) {
1655
120k
            copy_recon(pcs, md_ctx, blk_ptr);
1656
120k
        }
1657
1658
        // Initialize the Transform Loop
1659
120k
        const uint8_t  tx_depth          = blk_ptr->block_mi.tx_depth;
1660
120k
        const uint16_t txb_count         = tx_blocks_per_depth[blk_geom->bsize][tx_depth];
1661
120k
        const TxSize   tx_size           = tx_depth_to_tx_size[tx_depth][blk_geom->bsize];
1662
120k
        const int      tx_width          = tx_size_wide[tx_size];
1663
120k
        const int      tx_height         = tx_size_high[tx_size];
1664
120k
        const TxSize   tx_size_uv        = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1);
1665
120k
        const int      tx_width_uv       = tx_size_wide[tx_size_uv];
1666
120k
        const int      tx_height_uv      = tx_size_high[tx_size_uv];
1667
120k
        uint32_t       blk_coded_area    = 0;
1668
120k
        uint32_t       blk_coded_area_uv = 0;
1669
603k
        for (ctx->txb_itr = 0; ctx->txb_itr < txb_count; ctx->txb_itr++) {
1670
482k
            const uint8_t uv_pass = tx_depth && ctx->txb_itr ? 0 : 1; //NM: 128x128 exeption
1671
1672
            // Copy quantized coeffs to EncDec buffers if EncDec was bypassed; if pred depth only was used
1673
            // and NSQ is OFF data was copied directly to EncDec buffers in MD
1674
482k
            if (md_ctx->bypass_encdec && !(md_ctx->fixed_partition)) {
1675
482k
                copy_qcoeffs(pcs, ctx, blk_ptr, blk_coded_area, blk_coded_area_uv);
1676
482k
            }
1677
1678
            // Perform CDF update (MD feature) if enabled
1679
482k
            if (pcs->cdf_ctrl.update_coef) {
1680
0
                update_coeff_cdf(pcs, ctx, blk_ptr);
1681
0
            }
1682
1683
482k
            blk_coded_area += tx_width * tx_height;
1684
482k
            ctx->coded_area_sb_update += tx_width * tx_height;
1685
1686
482k
            if (md_ctx->has_uv && uv_pass) {
1687
120k
                blk_coded_area_uv += tx_width_uv * tx_height_uv;
1688
120k
                ctx->coded_area_sb_uv_update += tx_width_uv * tx_height_uv;
1689
120k
            }
1690
482k
        }
1691
120k
    }
1692
128k
    if (!md_ctx->bypass_encdec) {
1693
0
        md_ctx->blk_org_x = ctx->blk_org_x;
1694
0
        md_ctx->blk_org_y = ctx->blk_org_y;
1695
0
        md_ctx->blk_geom  = ctx->blk_geom;
1696
0
        svt_aom_update_mi_map_enc_dec(blk_ptr, md_ctx, pcs);
1697
0
    }
1698
128k
    if (pcs->cdf_ctrl.update_se) {
1699
        // Update the partition Neighbor Array
1700
0
        PartitionContext partition;
1701
0
        partition.above = partition_context_lookup[blk_geom->bsize].above;
1702
0
        partition.left  = partition_context_lookup[blk_geom->bsize].left;
1703
1704
0
        svt_aom_neighbor_array_unit_mode_write(pcs->ep_partition_context_na[tile_idx],
1705
0
                                               (uint8_t*)&partition,
1706
0
                                               ctx->blk_org_x,
1707
0
                                               ctx->blk_org_y,
1708
0
                                               blk_geom->bwidth,
1709
0
                                               blk_geom->bheight,
1710
0
                                               NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1711
1712
        // Update the CDFs based on the current block
1713
0
        blk_ptr->av1xd->tile_ctx           = &pcs->ec_ctx_array[sb_index];
1714
0
        uint32_t txfm_context_left_index   = get_neighbor_array_unit_left_index(pcs->ep_txfm_context_na[tile_idx],
1715
0
                                                                              ctx->blk_org_y);
1716
0
        uint32_t txfm_context_above_index  = get_neighbor_array_unit_top_index(pcs->ep_txfm_context_na[tile_idx],
1717
0
                                                                              ctx->blk_org_x);
1718
0
        blk_ptr->av1xd->above_txfm_context = &(pcs->ep_txfm_context_na[tile_idx]->top_array[txfm_context_above_index]);
1719
0
        blk_ptr->av1xd->left_txfm_context  = &(pcs->ep_txfm_context_na[tile_idx]->left_array[txfm_context_left_index]);
1720
0
        svt_aom_tx_size_bits(pcs,
1721
0
                             ctx->blk_ptr->segment_id,
1722
0
                             md_ctx->md_rate_est_ctx,
1723
0
                             blk_ptr->av1xd,
1724
0
                             blk_ptr->av1xd->mi[0],
1725
0
                             tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][blk_geom->bsize],
1726
0
                             pcs->ppcs->frm_hdr.tx_mode,
1727
0
                             blk_geom->bsize,
1728
0
                             !blk_ptr->block_has_coeff,
1729
0
                             &pcs->ec_ctx_array[sb_index],
1730
0
                             1 /*allow_update_cdf*/);
1731
0
        svt_aom_update_stats(pcs, blk_ptr, ctx->blk_org_y >> MI_SIZE_LOG2, ctx->blk_org_x >> MI_SIZE_LOG2);
1732
0
    }
1733
1734
    // Copy final symbols and mode info from MD array to SB ptr
1735
    // Data will be overwritten each iteration, so copying is useful. Data is updated at EntropyCoding.
1736
128k
    sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt].av1xd = NULL;
1737
    // ENCDEC palette info buffer
1738
128k
    {
1739
128k
        if (svt_av1_allow_palette(pcs->ppcs->palette_level, blk_geom->bsize)) {
1740
0
            ec_rtime_alloc_palette_info(&sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt]);
1741
128k
        } else {
1742
128k
            sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt].palette_info = NULL;
1743
128k
        }
1744
128k
    }
1745
128k
    BlkStruct*   src_cu = blk_ptr;
1746
128k
    EcBlkStruct* dst_cu = &sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt];
1747
128k
    *output_blk_ptr     = &sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt];
1748
128k
    svt_aom_move_blk_data(pcs, ctx, src_cu, dst_cu);
1749
128k
    sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt++].av1xd = sb_ptr->av1xd;
1750
    // MFMV Update
1751
128k
    if (pcs->scs->mfmv_enabled && pcs->slice_type != I_SLICE && pcs->ppcs->is_ref) {
1752
0
        uint32_t           mi_stride = pcs->mi_stride;
1753
0
        int32_t            mi_row    = ctx->blk_org_y >> MI_SIZE_LOG2;
1754
0
        int32_t            mi_col    = ctx->blk_org_x >> MI_SIZE_LOG2;
1755
0
        const int32_t      offset    = mi_row * mi_stride + mi_col;
1756
0
        MbModeInfo*        mbmi      = pcs->mi_grid_base[offset];
1757
0
        const int          x_mis  = AOMMIN(ctx->blk_geom->bwidth >> MI_SIZE_LOG2, pcs->ppcs->av1_cm->mi_cols - mi_col);
1758
0
        const int          y_mis  = AOMMIN(ctx->blk_geom->bheight >> MI_SIZE_LOG2, pcs->ppcs->av1_cm->mi_rows - mi_row);
1759
0
        EbReferenceObject* obj_l0 = (EbReferenceObject*)pcs->ppcs->ref_pic_wrapper->object_ptr;
1760
1761
0
        av1_copy_frame_mvs(pcs, pcs->ppcs->av1_cm, mbmi[0], mi_row, mi_col, x_mis, y_mis, obj_l0);
1762
0
    }
1763
128k
}
1764
1765
/*******************************************
1766
* Encode Pass
1767
*
1768
* Summary: Performs an AV1 conformant encode/reconstruction
1769
*   for a block based on the pre-determined mode info.
1770
*
1771
* Inputs:
1772
*   SourcePic
1773
*   Coding Results
1774
*   SB Location
1775
*   Sequence Control Set
1776
*   Picture Control Set
1777
*
1778
* Outputs:
1779
*   Reconstructed Samples
1780
*   Coefficient Samples
1781
*
1782
*******************************************/
1783
static void encode_b(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr, EcBlkStruct** output_blk_ptr,
1784
128k
                     const int mi_row, const int mi_col) {
1785
128k
    ModeDecisionContext* md_ctx = ctx->md_ctx;
1786
128k
    ctx->blk_geom = md_ctx->blk_geom = get_blk_geom_mds(pcs->scs->blk_geom_mds, blk_ptr->mds_idx);
1787
128k
    ctx->blk_ptr = md_ctx->blk_ptr = blk_ptr;
1788
128k
    ctx->blk_org_x = md_ctx->blk_org_x = mi_col << MI_SIZE_LOG2;
1789
128k
    ctx->blk_org_y = md_ctx->blk_org_y = mi_row << MI_SIZE_LOG2;
1790
128k
    md_ctx->has_uv                     = is_chroma_reference(mi_row, mi_col, md_ctx->blk_geom->bsize, 1, 1);
1791
128k
    if (ctx->md_ctx->bypass_encdec) {
1792
128k
        update_b(pcs, ctx, blk_ptr, output_blk_ptr);
1793
128k
        return;
1794
128k
    }
1795
1796
    /* ED should use the skip decision from MD. If MD signals 0 coeffs, the TX will
1797
    be bypassed unless MD did not perform chroma (blk_skip_decision) or the block is an
1798
    INTRA block (since the prediction at MD may not be conformant). */
1799
18.4E
    ctx->md_skip_blk         = md_ctx->blk_skip_decision
1800
18.4E
                ? ((is_intra_mode(blk_ptr->block_mi.mode) || blk_ptr->block_has_coeff) ? 0 : 1)
1801
18.4E
                : 0;
1802
18.4E
    blk_ptr->block_has_coeff = 0;
1803
1804
18.4E
    if (is_inter_block(&blk_ptr->block_mi)) {
1805
0
        perform_inter_coding_loop(pcs, ctx);
1806
18.4E
    } else if (is_intra_mode(blk_ptr->block_mi.mode)) {
1807
0
        if (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT && pcs->hbd_md == 0 &&
1808
0
            blk_ptr->palette_size[0] > 0) {
1809
            //MD was done on 8bit, scale  palette colors to 10bit
1810
0
            for (uint8_t col = 0; col < blk_ptr->palette_size[0]; col++) {
1811
0
                blk_ptr->palette_info->pmi.palette_colors[col] *= 4;
1812
0
            }
1813
0
        }
1814
0
        perform_intra_coding_loop(pcs, ctx);
1815
18.4E
    } else {
1816
18.4E
        EncodeContext* enc_ctx = pcs->scs->enc_ctx;
1817
18.4E
        CHECK_REPORT_ERROR_NC(enc_ctx->app_callback_ptr, EB_ENC_CL_ERROR2);
1818
18.4E
    }
1819
1820
18.4E
    if (pcs->ppcs->frm_hdr.allow_intrabc && ctx->is_16bit && (ctx->bit_depth == EB_EIGHT_BIT)) {
1821
0
        svt_aom_convert_recon_16bit_to_8bit(pcs, ctx);
1822
0
    }
1823
1824
    // Update block info and neighbour arrays needed for future blocks/pictures
1825
18.4E
    update_b(pcs, ctx, blk_ptr, output_blk_ptr);
1826
18.4E
}
1827
1828
void svt_aom_encode_sb(SequenceControlSet* scs, PictureControlSet* pcs, EncDecContext* ctx, SuperBlock* sb_ptr,
1829
175k
                       PC_TREE* pc_tree, PARTITION_TREE* ptree, int mi_row, int mi_col) {
1830
175k
    if (mi_row >= pcs->ppcs->av1_cm->mi_rows || mi_col >= pcs->ppcs->av1_cm->mi_cols) {
1831
0
        return;
1832
0
    }
1833
1834
175k
    const BlockSize bsize = pc_tree->bsize;
1835
175k
    assert(bsize < BLOCK_SIZES_ALL);
1836
175k
    const int           hbs          = mi_size_wide[bsize] >> 1;
1837
175k
    const PartitionType partition    = pc_tree->partition;
1838
175k
    const int           quarter_step = mi_size_wide[bsize] >> 2;
1839
1840
175k
    ptree->partition   = partition;
1841
175k
    ptree->bsize       = bsize;
1842
175k
    ctx->md_ctx->shape = from_part_to_shape[partition];
1843
175k
    if (pcs->cdf_ctrl.update_se) {
1844
        // Update the partition stats
1845
0
        svt_aom_update_part_stats(pcs, partition, bsize, ctx->tile_index, ctx->sb_index, mi_row, mi_col);
1846
0
    }
1847
1848
175k
    switch (partition) {
1849
128k
    case PARTITION_NONE:
1850
128k
        encode_b(pcs, ctx, pc_tree->block_data[PART_N][0], &ptree->blk_data[0], mi_row, mi_col);
1851
128k
        break;
1852
0
    case PARTITION_HORZ:
1853
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_H][0], &ptree->blk_data[0], mi_row, mi_col);
1854
0
        if (mi_row + hbs < pcs->ppcs->av1_cm->mi_rows) {
1855
0
            encode_b(pcs, ctx, pc_tree->block_data[PART_H][1], &ptree->blk_data[1], mi_row + hbs, mi_col);
1856
0
        }
1857
0
        break;
1858
0
    case PARTITION_VERT:
1859
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_V][0], &ptree->blk_data[0], mi_row, mi_col);
1860
0
        if (mi_col + hbs < pcs->ppcs->av1_cm->mi_cols) {
1861
0
            encode_b(pcs, ctx, pc_tree->block_data[PART_V][1], &ptree->blk_data[1], mi_row, mi_col + hbs);
1862
0
        }
1863
0
        break;
1864
46.7k
    case PARTITION_SPLIT:
1865
233k
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
1866
187k
            const int x_idx = (i & 1) * hbs;
1867
187k
            const int y_idx = (i >> 1) * hbs;
1868
187k
            if (mi_row + y_idx >= pcs->ppcs->av1_cm->mi_rows || mi_col + x_idx >= pcs->ppcs->av1_cm->mi_cols) {
1869
18.3k
                continue;
1870
18.3k
            }
1871
168k
            svt_aom_encode_sb(
1872
168k
                scs, pcs, ctx, sb_ptr, pc_tree->split[i], ptree->sub_tree[i], mi_row + y_idx, mi_col + x_idx);
1873
168k
        }
1874
46.7k
        break;
1875
0
    case PARTITION_HORZ_A:
1876
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_HA][0], &ptree->blk_data[0], mi_row, mi_col);
1877
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_HA][1], &ptree->blk_data[1], mi_row, mi_col + hbs);
1878
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_HA][2], &ptree->blk_data[2], mi_row + hbs, mi_col);
1879
0
        break;
1880
0
    case PARTITION_HORZ_B:
1881
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_HB][0], &ptree->blk_data[0], mi_row, mi_col);
1882
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_HB][1], &ptree->blk_data[1], mi_row + hbs, mi_col);
1883
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_HB][2], &ptree->blk_data[2], mi_row + hbs, mi_col + hbs);
1884
0
        break;
1885
0
    case PARTITION_VERT_A:
1886
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_VA][0], &ptree->blk_data[0], mi_row, mi_col);
1887
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_VA][1], &ptree->blk_data[1], mi_row + hbs, mi_col);
1888
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_VA][2], &ptree->blk_data[2], mi_row, mi_col + hbs);
1889
0
        break;
1890
0
    case PARTITION_VERT_B:
1891
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_VB][0], &ptree->blk_data[0], mi_row, mi_col);
1892
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_VB][1], &ptree->blk_data[1], mi_row, mi_col + hbs);
1893
0
        encode_b(pcs, ctx, pc_tree->block_data[PART_VB][2], &ptree->blk_data[2], mi_row + hbs, mi_col + hbs);
1894
0
        break;
1895
0
    case PARTITION_HORZ_4:
1896
0
        for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) {
1897
0
            int this_mi_row = mi_row + i * quarter_step;
1898
0
            if (i > 0 && this_mi_row >= pcs->ppcs->av1_cm->mi_rows) {
1899
                // Only the last block is able to be outside the picture boundary. If one of the first
1900
                // 3 blocks is outside the boundary, H4 is not a valid partition (see AV1 spec 5.11.4)
1901
0
                assert(i == 3);
1902
0
                break;
1903
0
            }
1904
0
            encode_b(pcs, ctx, pc_tree->block_data[PART_H4][i], &ptree->blk_data[i], this_mi_row, mi_col);
1905
0
        }
1906
0
        break;
1907
0
    case PARTITION_VERT_4:
1908
0
        for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) {
1909
0
            int this_mi_col = mi_col + i * quarter_step;
1910
0
            if (i > 0 && this_mi_col >= pcs->ppcs->av1_cm->mi_cols) {
1911
                // Only the last block is able to be outside the picture boundary. If one of the first
1912
                // 3 blocks is outside the boundary, H4 is not a valid partition (see AV1 spec 5.11.4)
1913
0
                assert(i == 3);
1914
0
                break;
1915
0
            }
1916
0
            encode_b(pcs, ctx, pc_tree->block_data[PART_V4][i], &ptree->blk_data[i], mi_row, this_mi_col);
1917
0
        }
1918
0
        break;
1919
0
    default:
1920
        assert(0 && "Invalid partition type.");
1921
0
        break;
1922
175k
    }
1923
175k
}