/work/svt-av1/Source/Lib/Codec/coding_loop.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 3-Clause Clear License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | #include <string.h> |
13 | | |
14 | | #include "coding_loop.h" |
15 | | #include "utility.h" |
16 | | #include "rd_cost.h" |
17 | | #include "deblocking_filter.h" |
18 | | #include "pic_operators.h" |
19 | | #include "segmentation.h" |
20 | | #include "enc_dec_process.h" |
21 | | #include "EbSvtAv1ErrorCodes.h" |
22 | | #include "transforms.h" |
23 | | #include "inv_transforms.h" |
24 | | #include "md_config_process.h" |
25 | | #include "enc_intra_prediction.h" |
26 | | #include "aom_dsp_rtcd.h" |
27 | | #include "md_rate_estimation.h" |
28 | | #include "full_loop.h" |
29 | | #include "pack_unpack_c.h" |
30 | | #include "enc_inter_prediction.h" |
31 | | |
32 | | void aom_av1_set_ssim_rdmult(ModeDecisionContext* ctx, PictureControlSet* pcs, const int mi_row, const int mi_col); |
33 | | |
34 | 0 | static EbErrorType ec_rtime_alloc_palette_info(EcBlkStruct* md_blk_arr_nsq) { |
35 | 0 | EB_MALLOC_ARRAY(md_blk_arr_nsq->palette_info, 1); |
36 | 0 | EB_MALLOC_ARRAY(md_blk_arr_nsq->palette_info->color_idx_map, MAX_PALETTE_SQUARE); |
37 | | |
38 | 0 | return EB_ErrorNone; |
39 | 0 | } |
40 | | |
41 | | /******************************************* |
42 | | * set Penalize Skip Flag |
43 | | * |
44 | | * Summary: Set the penalize_skipflag to true |
45 | | * When there is luminance/chrominance change |
46 | | * or in noisy clip with low motion at meduim |
47 | | * varince area |
48 | | * |
49 | | *******************************************/ |
50 | | |
51 | | typedef void (*EbAv1EncodeLoopFuncPtr)(PictureControlSet* pcs, EncDecContext* ed_ctx, SuperBlock* sb_ptr, |
52 | | uint32_t org_x, uint32_t org_y, |
53 | | EbPictureBufferDesc* pred_samples, // no basis/offset |
54 | | EbPictureBufferDesc* coeff_samples_sb, // sb based |
55 | | EbPictureBufferDesc* residual16bit, // no basis/offset |
56 | | EbPictureBufferDesc* transform16bit, // no basis/offset |
57 | | EbPictureBufferDesc* inverse_quant_buffer, uint32_t component_mask, |
58 | | uint16_t* eob); |
59 | | |
60 | | typedef void (*EbAv1GenerateReconFuncPtr)(EncDecContext* ed_ctx, uint32_t org_x, uint32_t org_y, |
61 | | EbPictureBufferDesc* pred_samples, // no basis/offset |
62 | | EbPictureBufferDesc* residual16bit, // no basis/offset |
63 | | uint32_t component_mask, uint16_t* eob); |
64 | | |
65 | | /******************************************* |
66 | | * Residual Kernel 8-16bit |
67 | | Computes the residual data |
68 | | *******************************************/ |
69 | | void svt_aom_residual_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, uint8_t* pred, |
70 | | uint32_t pred_offset, uint32_t pred_stride, int16_t* residual, uint32_t residual_offset, |
71 | 1.22M | uint32_t residual_stride, bool hbd, uint32_t area_width, uint32_t area_height) { |
72 | 1.22M | if (hbd) { |
73 | 0 | svt_residual_kernel16bit(((uint16_t*)input) + input_offset, |
74 | 0 | input_stride, |
75 | 0 | ((uint16_t*)pred) + pred_offset, |
76 | 0 | pred_stride, |
77 | 0 | residual + residual_offset, |
78 | 0 | residual_stride, |
79 | 0 | area_width, |
80 | 0 | area_height); |
81 | 1.22M | } else { |
82 | 1.22M | svt_residual_kernel8bit(&(input[input_offset]), |
83 | 1.22M | input_stride, |
84 | 1.22M | &(pred[pred_offset]), |
85 | 1.22M | pred_stride, |
86 | 1.22M | residual + residual_offset, |
87 | 1.22M | residual_stride, |
88 | 1.22M | area_width, |
89 | 1.22M | area_height); |
90 | 1.22M | } |
91 | 1.22M | } |
92 | | |
93 | | /*************************************************** |
94 | | * Update Recon Samples Neighbor Arrays |
95 | | ***************************************************/ |
96 | | static void encode_pass_update_recon_sample_neighbour_arrays( |
97 | | NeighborArrayUnit* lumaReconSampleNeighborArray, NeighborArrayUnit* cbReconSampleNeighborArray, |
98 | | NeighborArrayUnit* crReconSampleNeighborArray, EbPictureBufferDesc* recon_buffer, uint32_t org_x, uint32_t org_y, |
99 | 0 | uint32_t width, uint32_t height, uint32_t bwidth_uv, uint32_t bheight_uv, uint32_t component_mask, bool is_16bit) { |
100 | 0 | uint32_t round_origin_x = ROUND_UV(org_x); // for Chroma blocks with size of 4 |
101 | 0 | uint32_t round_origin_y = ROUND_UV(org_y); // for Chroma blocks with size of 4 |
102 | |
|
103 | 0 | if (is_16bit == true) { |
104 | 0 | if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) { |
105 | | // Recon Samples - Luma |
106 | 0 | svt_aom_neighbor_array_unit16bit_sample_write(lumaReconSampleNeighborArray, |
107 | 0 | (uint16_t*)(recon_buffer->y_buffer), |
108 | 0 | recon_buffer->y_stride, |
109 | 0 | org_x, |
110 | 0 | org_y, |
111 | 0 | org_x, |
112 | 0 | org_y, |
113 | 0 | width, |
114 | 0 | height, |
115 | 0 | NEIGHBOR_ARRAY_UNIT_FULL_MASK); |
116 | 0 | } |
117 | |
|
118 | 0 | if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) { |
119 | | // Recon Samples - Cb |
120 | 0 | svt_aom_neighbor_array_unit16bit_sample_write(cbReconSampleNeighborArray, |
121 | 0 | (uint16_t*)(recon_buffer->u_buffer), |
122 | 0 | recon_buffer->u_stride, |
123 | 0 | round_origin_x >> 1, |
124 | 0 | round_origin_y >> 1, |
125 | 0 | round_origin_x >> 1, |
126 | 0 | round_origin_y >> 1, |
127 | 0 | bwidth_uv, |
128 | 0 | bheight_uv, |
129 | 0 | NEIGHBOR_ARRAY_UNIT_FULL_MASK); |
130 | | |
131 | | // Recon Samples - Cr |
132 | 0 | svt_aom_neighbor_array_unit16bit_sample_write(crReconSampleNeighborArray, |
133 | 0 | (uint16_t*)(recon_buffer->v_buffer), |
134 | 0 | recon_buffer->v_stride, |
135 | 0 | round_origin_x >> 1, |
136 | 0 | round_origin_y >> 1, |
137 | 0 | round_origin_x >> 1, |
138 | 0 | round_origin_y >> 1, |
139 | 0 | bwidth_uv, |
140 | 0 | bheight_uv, |
141 | 0 | NEIGHBOR_ARRAY_UNIT_FULL_MASK); |
142 | 0 | } |
143 | 0 | } else { |
144 | 0 | if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) { |
145 | | // Recon Samples - Luma |
146 | 0 | svt_aom_neighbor_array_unit_sample_write(lumaReconSampleNeighborArray, |
147 | 0 | recon_buffer->y_buffer, |
148 | 0 | recon_buffer->y_stride, |
149 | 0 | org_x, |
150 | 0 | org_y, |
151 | 0 | org_x, |
152 | 0 | org_y, |
153 | 0 | width, |
154 | 0 | height, |
155 | 0 | NEIGHBOR_ARRAY_UNIT_FULL_MASK); |
156 | 0 | } |
157 | |
|
158 | 0 | if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) { |
159 | | // Recon Samples - Cb |
160 | 0 | svt_aom_neighbor_array_unit_sample_write(cbReconSampleNeighborArray, |
161 | 0 | recon_buffer->u_buffer, |
162 | 0 | recon_buffer->u_stride, |
163 | 0 | round_origin_x >> 1, |
164 | 0 | round_origin_y >> 1, |
165 | 0 | round_origin_x >> 1, |
166 | 0 | round_origin_y >> 1, |
167 | 0 | bwidth_uv, |
168 | 0 | bheight_uv, |
169 | 0 | NEIGHBOR_ARRAY_UNIT_FULL_MASK); |
170 | | |
171 | | // Recon Samples - Cr |
172 | 0 | svt_aom_neighbor_array_unit_sample_write(crReconSampleNeighborArray, |
173 | 0 | recon_buffer->v_buffer, |
174 | 0 | recon_buffer->v_stride, |
175 | 0 | round_origin_x >> 1, |
176 | 0 | round_origin_y >> 1, |
177 | 0 | round_origin_x >> 1, |
178 | 0 | round_origin_y >> 1, |
179 | 0 | bwidth_uv, |
180 | 0 | bheight_uv, |
181 | 0 | NEIGHBOR_ARRAY_UNIT_FULL_MASK); |
182 | 0 | } |
183 | 0 | } |
184 | 0 | } |
185 | | |
186 | | /********************************************************** |
187 | | * Encode Loop |
188 | | * |
189 | | * Summary: Performs an AV1 conformant CfL prediction based on |
190 | | * recon luma samples in pred_samples |
191 | | * |
192 | | * Inputs: |
193 | | * pred_samples - recon luma samples on which CfL prediction is based |
194 | | * |
195 | | * Outputs: |
196 | | * pred_samples - predicted chroma samples for cb and cr |
197 | | * |
198 | | **********************************************************/ |
199 | | static void av1_encode_generate_cfl_prediction(EbPictureBufferDesc* pred_samples, EncDecContext* ed_ctx, |
200 | | uint32_t pred_cb_offset, uint32_t pred_cr_offset, |
201 | 0 | uint32_t round_origin_x, uint32_t round_origin_y) { |
202 | 0 | bool is_16bit = ed_ctx->is_16bit; |
203 | 0 | const BlockGeom* blk_geom = ed_ctx->blk_geom; |
204 | 0 | BlkStruct* blk_ptr = ed_ctx->blk_ptr; |
205 | |
|
206 | 0 | EbPictureBufferDesc* recon_samples = pred_samples; |
207 | |
|
208 | 0 | uint32_t recon_luma_offset = (round_origin_y * recon_samples->y_stride) + round_origin_x; |
209 | | |
210 | | // Down sample Luma |
211 | 0 | if (is_16bit) { |
212 | 0 | svt_cfl_luma_subsampling_420_hbd( |
213 | 0 | ((uint16_t*)recon_samples->y_buffer) + recon_luma_offset, |
214 | 0 | recon_samples->y_stride, |
215 | 0 | ed_ctx->md_ctx->pred_buf_q3, |
216 | 0 | blk_geom->bwidth_uv == blk_geom->bwidth ? (blk_geom->bwidth_uv << 1) : blk_geom->bwidth, |
217 | 0 | blk_geom->bheight_uv == blk_geom->bheight ? (blk_geom->bheight_uv << 1) : blk_geom->bheight); |
218 | 0 | } else { |
219 | 0 | svt_cfl_luma_subsampling_420_lbd( |
220 | 0 | recon_samples->y_buffer + recon_luma_offset, |
221 | 0 | recon_samples->y_stride, |
222 | 0 | ed_ctx->md_ctx->pred_buf_q3, |
223 | 0 | blk_geom->bwidth_uv == blk_geom->bwidth ? (blk_geom->bwidth_uv << 1) : blk_geom->bwidth, |
224 | 0 | blk_geom->bheight_uv == blk_geom->bheight ? (blk_geom->bheight_uv << 1) : blk_geom->bheight); |
225 | 0 | } |
226 | |
|
227 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1); |
228 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
229 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
230 | |
|
231 | 0 | int32_t round_offset = (tx_width_uv * tx_height_uv) / 2; |
232 | |
|
233 | 0 | svt_subtract_average(ed_ctx->md_ctx->pred_buf_q3, |
234 | 0 | tx_width_uv, |
235 | 0 | tx_height_uv, |
236 | 0 | round_offset, |
237 | 0 | svt_log2f(tx_width_uv) + svt_log2f(tx_height_uv)); |
238 | |
|
239 | 0 | int32_t alpha_q3_cb = cfl_idx_to_alpha(blk_ptr->block_mi.cfl_alpha_idx, |
240 | 0 | blk_ptr->block_mi.cfl_alpha_signs, |
241 | 0 | CFL_PRED_U); // once for U, once for V |
242 | 0 | int32_t alpha_q3_cr = cfl_idx_to_alpha(blk_ptr->block_mi.cfl_alpha_idx, |
243 | 0 | blk_ptr->block_mi.cfl_alpha_signs, |
244 | 0 | CFL_PRED_V); // once for U, once for V |
245 | |
|
246 | 0 | if (is_16bit) { |
247 | 0 | svt_cfl_predict_hbd(ed_ctx->md_ctx->pred_buf_q3, |
248 | 0 | ((uint16_t*)pred_samples->u_buffer) + pred_cb_offset, |
249 | 0 | pred_samples->u_stride, |
250 | 0 | ((uint16_t*)pred_samples->u_buffer) + pred_cb_offset, |
251 | 0 | pred_samples->u_stride, |
252 | 0 | alpha_q3_cb, |
253 | 0 | ed_ctx->bit_depth, |
254 | 0 | tx_width_uv, |
255 | 0 | tx_height_uv); |
256 | |
|
257 | 0 | svt_cfl_predict_hbd(ed_ctx->md_ctx->pred_buf_q3, |
258 | 0 | ((uint16_t*)pred_samples->v_buffer) + pred_cr_offset, |
259 | 0 | pred_samples->v_stride, |
260 | 0 | ((uint16_t*)pred_samples->v_buffer) + pred_cr_offset, |
261 | 0 | pred_samples->v_stride, |
262 | 0 | alpha_q3_cr, |
263 | 0 | ed_ctx->bit_depth, |
264 | 0 | tx_width_uv, |
265 | 0 | tx_height_uv); |
266 | 0 | } else { |
267 | 0 | svt_cfl_predict_lbd(ed_ctx->md_ctx->pred_buf_q3, |
268 | 0 | pred_samples->u_buffer + pred_cb_offset, |
269 | 0 | pred_samples->u_stride, |
270 | 0 | pred_samples->u_buffer + pred_cb_offset, |
271 | 0 | pred_samples->u_stride, |
272 | 0 | alpha_q3_cb, |
273 | 0 | 8, |
274 | 0 | tx_width_uv, |
275 | 0 | tx_height_uv); |
276 | |
|
277 | 0 | svt_cfl_predict_lbd(ed_ctx->md_ctx->pred_buf_q3, |
278 | 0 | pred_samples->v_buffer + pred_cr_offset, |
279 | 0 | pred_samples->v_stride, |
280 | 0 | pred_samples->v_buffer + pred_cr_offset, |
281 | 0 | pred_samples->v_stride, |
282 | 0 | alpha_q3_cr, |
283 | 0 | 8, |
284 | 0 | tx_width_uv, |
285 | 0 | tx_height_uv); |
286 | 0 | } |
287 | 0 | } |
288 | | |
289 | | /********************************************************** |
290 | | * Encode Loop |
291 | | * |
292 | | * Summary: Performs an AV1 conformant |
293 | | * Transform, Quantization and Inverse Quantization of a TU. |
294 | | * |
295 | | * Inputs: |
296 | | * org_x |
297 | | * org_y |
298 | | * txb_size |
299 | | * sb_sz |
300 | | * input - input samples (position sensitive) |
301 | | * pred - prediction samples (position independent) |
302 | | * |
303 | | * Outputs: |
304 | | * Inverse quantized coeff - quantization indices (position sensitive) |
305 | | * |
306 | | **********************************************************/ |
307 | | static void av1_encode_loop(PictureControlSet* pcs, EncDecContext* ed_ctx, uint32_t org_x, uint32_t org_y, |
308 | | EbPictureBufferDesc* pred_samples, // no basis/offset |
309 | | EbPictureBufferDesc* coeff_samples_sb, // sb based |
310 | | EbPictureBufferDesc* residual16bit, // no basis/offset |
311 | | EbPictureBufferDesc* transform16bit, // no basis/offset |
312 | | EbPictureBufferDesc* inverse_quant_buffer, uint32_t component_mask, uint16_t* eob) |
313 | | |
314 | 0 | { |
315 | 0 | ModeDecisionContext* md_ctx = ed_ctx->md_ctx; |
316 | 0 | const BlockGeom* blk_geom = ed_ctx->blk_geom; |
317 | 0 | BlkStruct* blk_ptr = ed_ctx->blk_ptr; |
318 | 0 | const uint32_t qindex = blk_ptr->qindex; |
319 | 0 | const bool is_16bit = ed_ctx->is_16bit; |
320 | 0 | const uint32_t bit_depth = ed_ctx->bit_depth; |
321 | 0 | EbPictureBufferDesc* input_samples = is_16bit ? ed_ctx->input_sample16bit_buffer : ed_ctx->input_samples; |
322 | |
|
323 | 0 | const bool is_inter = is_inter_block(&blk_ptr->block_mi); |
324 | 0 | const uint32_t round_origin_x = ROUND_UV(org_x); // for Chroma blocks with size of 4 |
325 | 0 | const uint32_t round_origin_y = ROUND_UV(org_y); // for Chroma blocks with size of 4 |
326 | 0 | const uint8_t tx_depth = blk_ptr->block_mi.tx_depth; |
327 | | // Get the tx origin coordinates within the SB (not frame) |
328 | 0 | const uint16_t tx_org_x = org_x - md_ctx->sb_origin_x; |
329 | 0 | const uint16_t tx_org_y = org_y - md_ctx->sb_origin_y; |
330 | 0 | const int32_t seg_qp = pcs->ppcs->frm_hdr.segmentation_params.segmentation_enabled |
331 | 0 | ? pcs->ppcs->frm_hdr.segmentation_params.feature_data[ed_ctx->blk_ptr->segment_id][SEG_LVL_ALT_Q] |
332 | 0 | : 0; |
333 | |
|
334 | 0 | uint32_t input_luma_offset, input_cb_offset, input_cr_offset; |
335 | 0 | uint32_t pred_luma_offset, pred_cb_offset, pred_cr_offset; |
336 | 0 | uint32_t scratch_luma_offset, scratch_cb_offset, scratch_cr_offset; |
337 | 0 | if (is_16bit) { |
338 | 0 | input_luma_offset = tx_org_x + tx_org_y * input_samples->y_stride; |
339 | 0 | input_cb_offset = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * input_samples->u_stride; |
340 | 0 | input_cr_offset = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * input_samples->v_stride; |
341 | 0 | pred_luma_offset = (org_y * pred_samples->y_stride) + org_x; |
342 | 0 | pred_cb_offset = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->u_stride); |
343 | 0 | pred_cr_offset = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->v_stride); |
344 | 0 | } else { |
345 | 0 | input_luma_offset = (org_y * input_samples->y_stride) + org_x; |
346 | 0 | input_cb_offset = ((round_origin_y >> 1) * input_samples->u_stride) + (round_origin_x >> 1); |
347 | 0 | input_cr_offset = ((round_origin_y >> 1) * input_samples->v_stride) + (round_origin_x >> 1); |
348 | |
|
349 | 0 | pred_luma_offset = org_x + (org_y * pred_samples->y_stride); |
350 | 0 | pred_cb_offset = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->u_stride); |
351 | 0 | pred_cr_offset = (round_origin_x >> 1) + ((round_origin_y >> 1) * pred_samples->v_stride); |
352 | 0 | } |
353 | |
|
354 | 0 | if (bit_depth != EB_EIGHT_BIT) { |
355 | | // Get the block origin coordinates within the SB (not frame) |
356 | 0 | const uint16_t blk_org_x_in_sb = md_ctx->blk_org_x - md_ctx->sb_origin_x; |
357 | 0 | const uint16_t blk_org_y_in_sb = md_ctx->blk_org_y - md_ctx->sb_origin_y; |
358 | 0 | scratch_luma_offset = blk_org_x_in_sb + blk_org_y_in_sb * residual16bit->y_stride; |
359 | 0 | scratch_cb_offset = ROUND_UV(blk_org_x_in_sb) / 2 + ROUND_UV(blk_org_y_in_sb) / 2 * residual16bit->u_stride; |
360 | 0 | scratch_cr_offset = ROUND_UV(blk_org_x_in_sb) / 2 + ROUND_UV(blk_org_y_in_sb) / 2 * residual16bit->v_stride; |
361 | 0 | } else { |
362 | 0 | scratch_luma_offset = tx_org_x + tx_org_y * residual16bit->y_stride; |
363 | 0 | scratch_cb_offset = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * residual16bit->u_stride; |
364 | 0 | scratch_cr_offset = ROUND_UV(tx_org_x) / 2 + ROUND_UV(tx_org_y) / 2 * residual16bit->v_stride; |
365 | 0 | } |
366 | 0 | ed_ctx->three_quad_energy = 0; |
367 | |
|
368 | 0 | if (pcs->ppcs->blk_lambda_tuning) { |
369 | 0 | md_ctx->blk_geom = ed_ctx->blk_geom; |
370 | 0 | md_ctx->blk_org_x = ed_ctx->blk_org_x; |
371 | 0 | md_ctx->blk_org_y = ed_ctx->blk_org_y; |
372 | | //Get the new lambda for current block |
373 | 0 | svt_aom_set_tuned_blk_lambda(md_ctx, pcs); |
374 | 0 | } else if (pcs->ppcs->scs->static_config.tune == TUNE_SSIM || pcs->ppcs->scs->static_config.tune == TUNE_IQ || |
375 | 0 | pcs->ppcs->scs->static_config.tune == TUNE_MS_SSIM) { |
376 | 0 | md_ctx->blk_geom = ed_ctx->blk_geom; |
377 | 0 | md_ctx->blk_org_x = ed_ctx->blk_org_x; |
378 | 0 | md_ctx->blk_org_y = ed_ctx->blk_org_y; |
379 | 0 | int mi_row = ed_ctx->blk_org_y / 4; |
380 | 0 | int mi_col = ed_ctx->blk_org_x / 4; |
381 | 0 | aom_av1_set_ssim_rdmult(md_ctx, pcs, mi_row, mi_col); |
382 | 0 | } |
383 | | |
384 | | //********************************** |
385 | | // Luma |
386 | | //********************************** |
387 | 0 | if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) { |
388 | 0 | if (ed_ctx->md_skip_blk) { |
389 | 0 | eob[0] = 0; |
390 | 0 | blk_ptr->quant_dc.y[ed_ctx->txb_itr] = 0; |
391 | 0 | } else { |
392 | 0 | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][blk_geom->bsize]; |
393 | 0 | const int tx_width = tx_size_wide[tx_size]; |
394 | 0 | const int tx_height = tx_size_high[tx_size]; |
395 | 0 | svt_aom_residual_kernel(input_samples->y_buffer, |
396 | 0 | input_luma_offset, |
397 | 0 | input_samples->y_stride, |
398 | 0 | pred_samples->y_buffer, |
399 | 0 | pred_luma_offset, |
400 | 0 | pred_samples->y_stride, |
401 | 0 | ((int16_t*)residual16bit->y_buffer), |
402 | 0 | scratch_luma_offset, |
403 | 0 | residual16bit->y_stride, |
404 | 0 | is_16bit, // hbd |
405 | 0 | tx_width, |
406 | 0 | tx_height); |
407 | 0 | svt_aom_estimate_transform(pcs, |
408 | 0 | ed_ctx->md_ctx, |
409 | 0 | ((int16_t*)residual16bit->y_buffer) + scratch_luma_offset, |
410 | 0 | residual16bit->y_stride, |
411 | 0 | ((TranLow*)transform16bit->y_buffer) + ed_ctx->coded_area_sb, |
412 | 0 | NOT_USED_VALUE, |
413 | 0 | tx_size, |
414 | 0 | &ed_ctx->three_quad_energy, |
415 | 0 | bit_depth, |
416 | 0 | blk_ptr->tx_type[ed_ctx->txb_itr], |
417 | 0 | PLANE_TYPE_Y, |
418 | 0 | DEFAULT_SHAPE); |
419 | |
|
420 | 0 | blk_ptr->quant_dc.y[ed_ctx->txb_itr] = svt_aom_quantize_inv_quantize( |
421 | 0 | pcs, |
422 | 0 | md_ctx, |
423 | 0 | ((int32_t*)transform16bit->y_buffer) + ed_ctx->coded_area_sb, |
424 | 0 | ((int32_t*)coeff_samples_sb->y_buffer) + ed_ctx->coded_area_sb, |
425 | 0 | ((int32_t*)inverse_quant_buffer->y_buffer) + ed_ctx->coded_area_sb, |
426 | 0 | qindex, |
427 | 0 | seg_qp, |
428 | 0 | tx_size, |
429 | 0 | &eob[0], |
430 | 0 | COMPONENT_LUMA, |
431 | 0 | bit_depth, |
432 | 0 | blk_ptr->tx_type[ed_ctx->txb_itr], |
433 | 0 | md_ctx->luma_txb_skip_context, |
434 | 0 | md_ctx->luma_dc_sign_context, |
435 | 0 | blk_ptr->block_mi.mode, |
436 | 0 | md_ctx->full_lambda_md[(bit_depth == EB_TEN_BIT) ? EB_10_BIT_MD : EB_8_BIT_MD], |
437 | 0 | true); |
438 | 0 | } |
439 | |
|
440 | 0 | blk_ptr->y_has_coeff |= (eob[0] > 0) << ed_ctx->txb_itr; |
441 | 0 | blk_ptr->eob.y[ed_ctx->txb_itr] = (uint16_t)eob[0]; |
442 | |
|
443 | 0 | if (eob[0] == 0) { |
444 | 0 | blk_ptr->tx_type[ed_ctx->txb_itr] = DCT_DCT; |
445 | | // INTER. Chroma follows Luma in transform type |
446 | 0 | if (ed_ctx->txb_itr == 0 && is_inter) { |
447 | 0 | blk_ptr->tx_type_uv = DCT_DCT; |
448 | 0 | } |
449 | 0 | } |
450 | 0 | } |
451 | |
|
452 | 0 | if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) { |
453 | | // If chroma uses CfL prediction, generate predicted samples based on previously computed recon luma |
454 | | // samples. The recon luma samples must be from a previous call to av1_encode_loop/av1_encode_generate_recon |
455 | | // because this function does not generate reconstructed samples. |
456 | 0 | if (is_intra_mode(blk_ptr->block_mi.mode) && blk_ptr->block_mi.uv_mode == UV_CFL_PRED) { |
457 | 0 | av1_encode_generate_cfl_prediction( |
458 | 0 | pred_samples, ed_ctx, pred_cb_offset, pred_cr_offset, round_origin_x, round_origin_y); |
459 | 0 | } |
460 | | |
461 | | //********************************** |
462 | | // Chroma |
463 | | //********************************** |
464 | 0 | if (ed_ctx->md_skip_blk) { |
465 | 0 | eob[1] = 0; |
466 | 0 | blk_ptr->quant_dc.u[ed_ctx->txb_itr] = 0; |
467 | 0 | eob[2] = 0; |
468 | 0 | blk_ptr->quant_dc.v[ed_ctx->txb_itr] = 0; |
469 | 0 | } else { |
470 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1); |
471 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
472 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
473 | | //********************************** |
474 | | // Cb |
475 | | //********************************** |
476 | 0 | svt_aom_residual_kernel(input_samples->u_buffer, |
477 | 0 | input_cb_offset, |
478 | 0 | input_samples->u_stride, |
479 | 0 | pred_samples->u_buffer, |
480 | 0 | pred_cb_offset, |
481 | 0 | pred_samples->u_stride, |
482 | 0 | ((int16_t*)residual16bit->u_buffer), |
483 | 0 | scratch_cb_offset, |
484 | 0 | residual16bit->u_stride, |
485 | 0 | is_16bit, // hbd |
486 | 0 | tx_width_uv, |
487 | 0 | tx_height_uv); |
488 | 0 | svt_aom_estimate_transform(pcs, |
489 | 0 | ed_ctx->md_ctx, |
490 | 0 | ((int16_t*)residual16bit->u_buffer) + scratch_cb_offset, |
491 | 0 | residual16bit->u_stride, |
492 | 0 | ((TranLow*)transform16bit->u_buffer) + ed_ctx->coded_area_sb_uv, |
493 | 0 | NOT_USED_VALUE, |
494 | 0 | tx_size_uv, |
495 | 0 | &ed_ctx->three_quad_energy, |
496 | 0 | bit_depth, |
497 | 0 | blk_ptr->tx_type_uv, |
498 | 0 | PLANE_TYPE_UV, |
499 | 0 | DEFAULT_SHAPE); |
500 | |
|
501 | 0 | blk_ptr->quant_dc.u[ed_ctx->txb_itr] = svt_aom_quantize_inv_quantize( |
502 | 0 | pcs, |
503 | 0 | md_ctx, |
504 | 0 | ((int32_t*)transform16bit->u_buffer) + ed_ctx->coded_area_sb_uv, |
505 | 0 | ((int32_t*)coeff_samples_sb->u_buffer) + ed_ctx->coded_area_sb_uv, |
506 | 0 | ((int32_t*)inverse_quant_buffer->u_buffer) + ed_ctx->coded_area_sb_uv, |
507 | 0 | qindex, |
508 | 0 | seg_qp, |
509 | 0 | tx_size_uv, |
510 | 0 | &eob[1], |
511 | 0 | COMPONENT_CHROMA_CB, |
512 | 0 | bit_depth, |
513 | 0 | blk_ptr->tx_type_uv, |
514 | 0 | md_ctx->cb_txb_skip_context, |
515 | 0 | md_ctx->cb_dc_sign_context, |
516 | 0 | blk_ptr->block_mi.mode, |
517 | 0 | md_ctx->full_lambda_md[(bit_depth == EB_TEN_BIT) ? EB_10_BIT_MD : EB_8_BIT_MD], |
518 | 0 | true); |
519 | | |
520 | | //********************************** |
521 | | // Cr |
522 | | //********************************** |
523 | 0 | svt_aom_residual_kernel(input_samples->v_buffer, |
524 | 0 | input_cr_offset, |
525 | 0 | input_samples->v_stride, |
526 | 0 | pred_samples->v_buffer, |
527 | 0 | pred_cr_offset, |
528 | 0 | pred_samples->v_stride, |
529 | 0 | ((int16_t*)residual16bit->v_buffer), |
530 | 0 | scratch_cr_offset, |
531 | 0 | residual16bit->v_stride, |
532 | 0 | is_16bit, // hbd |
533 | 0 | tx_width_uv, |
534 | 0 | tx_height_uv); |
535 | 0 | svt_aom_estimate_transform(pcs, |
536 | 0 | ed_ctx->md_ctx, |
537 | 0 | ((int16_t*)residual16bit->v_buffer) + scratch_cb_offset, |
538 | 0 | residual16bit->v_stride, |
539 | 0 | ((TranLow*)transform16bit->v_buffer) + ed_ctx->coded_area_sb_uv, |
540 | 0 | NOT_USED_VALUE, |
541 | 0 | tx_size_uv, |
542 | 0 | &ed_ctx->three_quad_energy, |
543 | 0 | bit_depth, |
544 | 0 | blk_ptr->tx_type_uv, |
545 | 0 | PLANE_TYPE_UV, |
546 | 0 | DEFAULT_SHAPE); |
547 | |
|
548 | 0 | blk_ptr->quant_dc.v[ed_ctx->txb_itr] = svt_aom_quantize_inv_quantize( |
549 | 0 | pcs, |
550 | 0 | md_ctx, |
551 | 0 | ((int32_t*)transform16bit->v_buffer) + ed_ctx->coded_area_sb_uv, |
552 | 0 | ((int32_t*)coeff_samples_sb->v_buffer) + ed_ctx->coded_area_sb_uv, |
553 | 0 | ((int32_t*)inverse_quant_buffer->v_buffer) + ed_ctx->coded_area_sb_uv, |
554 | 0 | qindex, |
555 | 0 | seg_qp, |
556 | 0 | tx_size_uv, |
557 | 0 | &eob[2], |
558 | 0 | COMPONENT_CHROMA_CR, |
559 | 0 | bit_depth, |
560 | 0 | blk_ptr->tx_type_uv, |
561 | 0 | md_ctx->cr_txb_skip_context, |
562 | 0 | md_ctx->cr_dc_sign_context, |
563 | 0 | blk_ptr->block_mi.mode, |
564 | 0 | md_ctx->full_lambda_md[(bit_depth == EB_TEN_BIT) ? EB_10_BIT_MD : EB_8_BIT_MD], |
565 | 0 | true); |
566 | 0 | } |
567 | |
|
568 | 0 | blk_ptr->u_has_coeff |= (eob[1] > 0) << ed_ctx->txb_itr; |
569 | 0 | blk_ptr->v_has_coeff |= (eob[2] > 0) << ed_ctx->txb_itr; |
570 | 0 | blk_ptr->eob.u[ed_ctx->txb_itr] = (uint16_t)eob[1]; |
571 | 0 | blk_ptr->eob.v[ed_ctx->txb_itr] = (uint16_t)eob[2]; |
572 | 0 | } |
573 | |
|
574 | 0 | return; |
575 | 0 | } |
576 | | |
577 | | /********************************************************** |
578 | | * Encode Generate Recon |
579 | | * |
580 | | * Summary: Performs an AV1 conformant |
581 | | * Inverse Transform and generate |
582 | | * the reconstructed samples of a TU. |
583 | | * |
584 | | * Inputs: |
585 | | * org_x |
586 | | * org_y |
587 | | * txb_size |
588 | | * sb_sz |
589 | | * input - Inverse Quantized Coeff (position sensitive) |
590 | | * pred - prediction samples (position independent) |
591 | | * |
592 | | * Outputs: |
593 | | * Recon (position independent) |
594 | | * |
595 | | **********************************************************/ |
596 | | static void av1_encode_generate_recon(PictureControlSet* pcs, EncDecContext* ed_ctx, uint32_t org_x, uint32_t org_y, |
597 | | EbPictureBufferDesc* pred_samples, // no basis/offset |
598 | | EbPictureBufferDesc* residual16bit, // no basis/offset |
599 | 0 | uint32_t component_mask, uint16_t* eob) { |
600 | 0 | BlkStruct* blk_ptr = ed_ctx->blk_ptr; |
601 | | |
602 | | //********************************** |
603 | | // Luma |
604 | | //********************************** |
605 | 0 | if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) { |
606 | 0 | if ((blk_ptr->y_has_coeff & (1 << ed_ctx->txb_itr)) && blk_ptr->block_mi.skip_mode == false) { |
607 | 0 | const TxSize tx_size = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ed_ctx->blk_geom->bsize]; |
608 | 0 | const uint32_t pred_luma_offset = (org_y * pred_samples->y_stride) + org_x; |
609 | 0 | svt_aom_inv_transform_recon_wrapper(pcs, |
610 | 0 | ed_ctx->md_ctx, |
611 | 0 | pred_samples->y_buffer, |
612 | 0 | pred_luma_offset, |
613 | 0 | pred_samples->y_stride, |
614 | 0 | pred_samples->y_buffer, |
615 | 0 | pred_luma_offset, |
616 | 0 | pred_samples->y_stride, |
617 | 0 | ((int32_t*)residual16bit->y_buffer), |
618 | 0 | ed_ctx->coded_area_sb, |
619 | 0 | ed_ctx->bit_depth == EB_TEN_BIT ? 1 : 0, // hbd |
620 | 0 | tx_size, |
621 | 0 | blk_ptr->tx_type[ed_ctx->txb_itr], |
622 | 0 | PLANE_TYPE_Y, |
623 | 0 | eob[0]); |
624 | 0 | } |
625 | 0 | } |
626 | | |
627 | | //********************************** |
628 | | // Chroma |
629 | | //********************************** |
630 | 0 | if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) { |
631 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(ed_ctx->blk_geom->bsize, 1, 1); |
632 | 0 | const uint32_t round_origin_x = ROUND_UV(org_x); // for Chroma blocks with size of 4 |
633 | 0 | const uint32_t round_origin_y = ROUND_UV(org_y); // for Chroma blocks with size of 4 |
634 | | |
635 | | //********************************** |
636 | | // Cb |
637 | | //********************************** |
638 | 0 | if ((blk_ptr->u_has_coeff & (1 << ed_ctx->txb_itr)) && blk_ptr->block_mi.skip_mode == false) { |
639 | 0 | const uint32_t pred_offset_cb = ((round_origin_y >> 1) * pred_samples->u_stride) + (round_origin_x >> 1); |
640 | 0 | svt_aom_inv_transform_recon_wrapper(pcs, |
641 | 0 | ed_ctx->md_ctx, |
642 | 0 | pred_samples->u_buffer, |
643 | 0 | pred_offset_cb, |
644 | 0 | pred_samples->u_stride, |
645 | 0 | pred_samples->u_buffer, |
646 | 0 | pred_offset_cb, |
647 | 0 | pred_samples->u_stride, |
648 | 0 | ((int32_t*)residual16bit->u_buffer), |
649 | 0 | ed_ctx->coded_area_sb_uv, |
650 | 0 | ed_ctx->bit_depth == EB_TEN_BIT ? 1 : 0, // hbd |
651 | 0 | tx_size_uv, |
652 | 0 | blk_ptr->tx_type_uv, |
653 | 0 | PLANE_TYPE_UV, |
654 | 0 | eob[1]); |
655 | 0 | } |
656 | | |
657 | | //********************************** |
658 | | // Cr |
659 | | //********************************** |
660 | 0 | if ((blk_ptr->v_has_coeff & (1 << ed_ctx->txb_itr)) && blk_ptr->block_mi.skip_mode == false) { |
661 | 0 | const uint32_t pred_offset_cr = ((round_origin_y >> 1) * pred_samples->v_stride) + (round_origin_x >> 1); |
662 | 0 | svt_aom_inv_transform_recon_wrapper(pcs, |
663 | 0 | ed_ctx->md_ctx, |
664 | 0 | pred_samples->v_buffer, |
665 | 0 | pred_offset_cr, |
666 | 0 | pred_samples->v_stride, |
667 | 0 | pred_samples->v_buffer, |
668 | 0 | pred_offset_cr, |
669 | 0 | pred_samples->v_stride, |
670 | 0 | ((int32_t*)residual16bit->v_buffer), |
671 | 0 | ed_ctx->coded_area_sb_uv, |
672 | 0 | ed_ctx->bit_depth == EB_TEN_BIT ? 1 : 0, // hbd |
673 | 0 | tx_size_uv, |
674 | 0 | blk_ptr->tx_type_uv, |
675 | 0 | PLANE_TYPE_UV, |
676 | 0 | eob[2]); |
677 | 0 | } |
678 | 0 | } |
679 | 0 | } |
680 | | |
681 | | void svt_aom_store16bit_input_src(EbPictureBufferDesc* input_sample16bit_buffer, PictureControlSet* pcs, uint32_t sb_x, |
682 | 0 | uint32_t sb_y, uint32_t sb_w, uint32_t sb_h) { |
683 | 0 | uint32_t row_it; |
684 | 0 | uint16_t* from_ptr; |
685 | 0 | uint16_t* to_ptr; |
686 | |
|
687 | 0 | from_ptr = (uint16_t*)input_sample16bit_buffer->y_buffer; |
688 | 0 | to_ptr = (uint16_t*)pcs->input_frame16bit->y_buffer + sb_x + (sb_y * pcs->input_frame16bit->y_stride); |
689 | |
|
690 | 0 | for (row_it = 0; row_it < sb_h; row_it++) { |
691 | 0 | svt_memcpy(to_ptr + row_it * pcs->input_frame16bit->y_stride, |
692 | 0 | from_ptr + row_it * input_sample16bit_buffer->y_stride, |
693 | 0 | sb_w * 2); |
694 | 0 | } |
695 | |
|
696 | 0 | sb_x = sb_x / 2; |
697 | 0 | sb_y = sb_y / 2; |
698 | 0 | sb_w = sb_w / 2; |
699 | 0 | sb_h = sb_h / 2; |
700 | |
|
701 | 0 | from_ptr = (uint16_t*)input_sample16bit_buffer->u_buffer; |
702 | 0 | to_ptr = (uint16_t*)pcs->input_frame16bit->u_buffer + sb_x + (sb_y * pcs->input_frame16bit->u_stride); |
703 | |
|
704 | 0 | for (row_it = 0; row_it < sb_h; row_it++) { |
705 | 0 | svt_memcpy(to_ptr + row_it * pcs->input_frame16bit->u_stride, |
706 | 0 | from_ptr + row_it * input_sample16bit_buffer->u_stride, |
707 | 0 | sb_w * 2); |
708 | 0 | } |
709 | |
|
710 | 0 | from_ptr = (uint16_t*)input_sample16bit_buffer->v_buffer; |
711 | 0 | to_ptr = (uint16_t*)pcs->input_frame16bit->v_buffer + sb_x + (sb_y * pcs->input_frame16bit->v_stride); |
712 | |
|
713 | 0 | for (row_it = 0; row_it < sb_h; row_it++) { |
714 | 0 | svt_memcpy(to_ptr + row_it * pcs->input_frame16bit->v_stride, |
715 | 0 | from_ptr + row_it * input_sample16bit_buffer->v_stride, |
716 | 0 | sb_w * 2); |
717 | 0 | } |
718 | 0 | } |
719 | | |
720 | | void svt_aom_update_mi_map_enc_dec(BlkStruct* blk_ptr, ModeDecisionContext* ctx, PictureControlSet* pcs); |
721 | | |
722 | 0 | static void perform_intra_coding_loop(PictureControlSet* pcs, EncDecContext* ed_ctx) { |
723 | 0 | BlkStruct* blk_ptr = ed_ctx->blk_ptr; |
724 | 0 | bool is_16bit = ed_ctx->is_16bit; |
725 | 0 | uint8_t is_inter = 0; // set to 0 b/c this is the intra path |
726 | 0 | EbPictureBufferDesc* recon_buffer; |
727 | 0 | EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ed_ctx->sb_index]; |
728 | 0 | uint16_t tile_idx = ed_ctx->tile_index; |
729 | 0 | NeighborArrayUnit* ep_luma_recon_na = is_16bit ? pcs->ep_luma_recon_na_16bit[tile_idx] |
730 | 0 | : pcs->ep_luma_recon_na[tile_idx]; |
731 | 0 | NeighborArrayUnit* ep_cb_recon_na = is_16bit ? pcs->ep_cb_recon_na_16bit[tile_idx] : pcs->ep_cb_recon_na[tile_idx]; |
732 | 0 | NeighborArrayUnit* ep_cr_recon_na = is_16bit ? pcs->ep_cr_recon_na_16bit[tile_idx] : pcs->ep_cr_recon_na[tile_idx]; |
733 | | |
734 | | // temp buffers for performing the transform/generating the recon |
735 | 0 | EbPictureBufferDesc* residual_buffer = ed_ctx->md_ctx->temp_residual; |
736 | 0 | EbPictureBufferDesc* transform_buffer = ed_ctx->md_ctx->tx_coeffs; |
737 | 0 | EbPictureBufferDesc* inverse_quant_buffer = ed_ctx->md_ctx->cand_bf_ptr_array[0]->rec_coeff; |
738 | |
|
739 | 0 | blk_ptr->y_has_coeff = 0; |
740 | 0 | blk_ptr->u_has_coeff = 0; |
741 | 0 | blk_ptr->v_has_coeff = 0; |
742 | 0 | uint16_t eobs[MAX_TXB_COUNT][3]; |
743 | 0 | svt_aom_get_recon_pic(pcs, &recon_buffer, is_16bit); |
744 | 0 | const uint8_t tx_depth = blk_ptr->block_mi.tx_depth; |
745 | 0 | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][ed_ctx->blk_geom->bsize]; |
746 | 0 | const int tx_width = tx_size_wide[tx_size]; |
747 | 0 | const int tx_height = tx_size_high[tx_size]; |
748 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(ed_ctx->blk_geom->bsize, 1, 1); |
749 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
750 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
751 | 0 | const uint32_t tot_tu = tx_blocks_per_depth[ed_ctx->blk_geom->bsize][tx_depth]; |
752 | 0 | const uint32_t sb_size_luma = pcs->ppcs->scs->sb_size; |
753 | 0 | const uint32_t sb_size_chroma = pcs->ppcs->scs->sb_size >> 1; |
754 | | |
755 | | // Luma path |
756 | 0 | for (ed_ctx->txb_itr = 0; ed_ctx->txb_itr < tot_tu; ed_ctx->txb_itr++) { |
757 | 0 | const uint16_t txb_origin_x = ed_ctx->blk_org_x + |
758 | 0 | tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].x; |
759 | 0 | const uint16_t txb_origin_y = ed_ctx->blk_org_y + |
760 | 0 | tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].y; |
761 | 0 | ed_ctx->md_ctx->luma_txb_skip_context = 0; |
762 | 0 | ed_ctx->md_ctx->luma_dc_sign_context = 0; |
763 | 0 | svt_aom_get_txb_ctx(pcs, |
764 | 0 | COMPONENT_LUMA, |
765 | 0 | pcs->ep_luma_dc_sign_level_coeff_na[tile_idx], |
766 | 0 | txb_origin_x, |
767 | 0 | txb_origin_y, |
768 | 0 | ed_ctx->blk_geom->bsize, |
769 | 0 | tx_size, |
770 | 0 | &ed_ctx->md_ctx->luma_txb_skip_context, |
771 | 0 | &ed_ctx->md_ctx->luma_dc_sign_context); |
772 | | |
773 | | // Copy neighbour arrays for intra prediction |
774 | 0 | const PredictionMode mode = blk_ptr->block_mi.mode; |
775 | 0 | const int ang = blk_ptr->block_mi.angle_delta[PLANE_TYPE_Y]; |
776 | 0 | const IntraSize intra_size = ang == 0 ? svt_aom_intra_unit[mode] : (IntraSize){2, 2}; |
777 | 0 | uint8_t top_neigh_array[(64 * 2 + 1) << 1]; |
778 | 0 | uint8_t left_neigh_array[(64 * 2 + 1) << 1]; |
779 | 0 | if (txb_origin_y != 0) { |
780 | 0 | svt_memcpy(top_neigh_array + ((uint64_t)1 << is_16bit), |
781 | 0 | ep_luma_recon_na->top_array + (txb_origin_x << is_16bit), |
782 | 0 | (tx_width * intra_size.top) << is_16bit); |
783 | 0 | } |
784 | |
|
785 | 0 | if (txb_origin_x != 0) { |
786 | 0 | uint16_t multipler = (txb_origin_y % sb_size_luma + tx_height * intra_size.left) > sb_size_luma |
787 | 0 | ? 1 |
788 | 0 | : intra_size.left; |
789 | 0 | svt_memcpy(left_neigh_array + ((uint64_t)1 << is_16bit), |
790 | 0 | ep_luma_recon_na->left_array + (txb_origin_y << is_16bit), |
791 | 0 | (tx_height * multipler) << is_16bit); |
792 | 0 | } |
793 | |
|
794 | 0 | if (txb_origin_y != 0 && txb_origin_x != 0) { |
795 | 0 | if (is_16bit) { |
796 | 0 | uint16_t* top_hbd = (uint16_t*)top_neigh_array; |
797 | 0 | uint16_t* left_hbd = (uint16_t*)left_neigh_array; |
798 | 0 | top_hbd[0] = left_hbd[0] = ((uint16_t*)(ep_luma_recon_na->top_left_array) + |
799 | 0 | ep_luma_recon_na->max_pic_h + txb_origin_x - txb_origin_y)[0]; |
800 | |
|
801 | 0 | } else { |
802 | 0 | top_neigh_array[0] = left_neigh_array[0] = |
803 | 0 | ep_luma_recon_na->top_left_array[ep_luma_recon_na->max_pic_h + txb_origin_x - txb_origin_y]; |
804 | 0 | } |
805 | 0 | } |
806 | |
|
807 | 0 | svt_av1_predict_intra_block(blk_ptr->av1xd, |
808 | 0 | ed_ctx->blk_geom->bsize, |
809 | 0 | tx_size, |
810 | 0 | mode, |
811 | 0 | blk_ptr->block_mi.angle_delta[PLANE_TYPE_Y], |
812 | 0 | blk_ptr->palette_size[0] > 0, |
813 | 0 | blk_ptr->palette_info, |
814 | 0 | blk_ptr->block_mi.filter_intra_mode, |
815 | 0 | top_neigh_array + ((uint64_t)1 << is_16bit), |
816 | 0 | left_neigh_array + ((uint64_t)1 << is_16bit), |
817 | 0 | recon_buffer, |
818 | 0 | (tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].x) >> 2, |
819 | 0 | (tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].y) >> 2, |
820 | 0 | PLANE_Y, |
821 | 0 | ed_ctx->md_ctx->shape, |
822 | 0 | txb_origin_x, |
823 | 0 | txb_origin_y, |
824 | 0 | &pcs->scs->seq_header, |
825 | 0 | ed_ctx->bit_depth); |
826 | | |
827 | | // Encode Transform Unit -INTRA- |
828 | 0 | av1_encode_loop(pcs, |
829 | 0 | ed_ctx, |
830 | 0 | txb_origin_x, |
831 | 0 | txb_origin_y, |
832 | 0 | recon_buffer, |
833 | 0 | coeff_buffer_sb, |
834 | 0 | residual_buffer, |
835 | 0 | transform_buffer, |
836 | 0 | inverse_quant_buffer, |
837 | 0 | PICTURE_BUFFER_DESC_LUMA_MASK, |
838 | 0 | eobs[ed_ctx->txb_itr]); |
839 | 0 | av1_encode_generate_recon(pcs, |
840 | 0 | ed_ctx, |
841 | 0 | txb_origin_x, |
842 | 0 | txb_origin_y, |
843 | 0 | recon_buffer, |
844 | 0 | inverse_quant_buffer, |
845 | 0 | PICTURE_BUFFER_DESC_LUMA_MASK, |
846 | 0 | eobs[ed_ctx->txb_itr]); |
847 | | |
848 | | // Update Recon Samples-INTRA- |
849 | 0 | encode_pass_update_recon_sample_neighbour_arrays(ep_luma_recon_na, |
850 | 0 | ep_cb_recon_na, |
851 | 0 | ep_cr_recon_na, |
852 | 0 | recon_buffer, |
853 | 0 | txb_origin_x, |
854 | 0 | txb_origin_y, |
855 | 0 | tx_width, |
856 | 0 | tx_height, |
857 | 0 | tx_width_uv, |
858 | 0 | tx_height_uv, |
859 | 0 | PICTURE_BUFFER_DESC_LUMA_MASK, |
860 | 0 | is_16bit); |
861 | |
|
862 | 0 | ed_ctx->coded_area_sb += tx_width * tx_height; |
863 | | |
864 | | // Update the luma Dc Sign Level Coeff Neighbor Array |
865 | 0 | { |
866 | 0 | uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.y[ed_ctx->txb_itr]; |
867 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_luma_dc_sign_level_coeff_na[tile_idx], |
868 | 0 | (uint8_t*)&dc_sign_level_coeff, |
869 | 0 | txb_origin_x, |
870 | 0 | txb_origin_y, |
871 | 0 | tx_width, |
872 | 0 | tx_height, |
873 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
874 | 0 | } |
875 | 0 | } // Transform Loop |
876 | | |
877 | | // Chroma path |
878 | |
|
879 | 0 | if (ed_ctx->md_ctx->has_uv) { |
880 | 0 | ed_ctx->txb_itr = 0; |
881 | 0 | uint16_t txb_origin_x = ed_ctx->blk_org_x + |
882 | 0 | tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].x; |
883 | 0 | uint16_t txb_origin_y = ed_ctx->blk_org_y + |
884 | 0 | tx_org[ed_ctx->blk_geom->bsize][is_inter][tx_depth][ed_ctx->txb_itr].y; |
885 | 0 | uint32_t blk_originx_uv = (ed_ctx->blk_org_x >> 3 << 3) >> 1; |
886 | 0 | uint32_t blk_originy_uv = (ed_ctx->blk_org_y >> 3 << 3) >> 1; |
887 | |
|
888 | 0 | ed_ctx->md_ctx->cb_txb_skip_context = 0; |
889 | 0 | ed_ctx->md_ctx->cb_dc_sign_context = 0; |
890 | 0 | svt_aom_get_txb_ctx(pcs, |
891 | 0 | COMPONENT_CHROMA, |
892 | 0 | pcs->ep_cb_dc_sign_level_coeff_na[tile_idx], |
893 | 0 | blk_originx_uv, |
894 | 0 | blk_originy_uv, |
895 | 0 | ed_ctx->blk_geom->bsize_uv, |
896 | 0 | tx_size_uv, |
897 | 0 | &ed_ctx->md_ctx->cb_txb_skip_context, |
898 | 0 | &ed_ctx->md_ctx->cb_dc_sign_context); |
899 | |
|
900 | 0 | ed_ctx->md_ctx->cr_txb_skip_context = 0; |
901 | 0 | ed_ctx->md_ctx->cr_dc_sign_context = 0; |
902 | 0 | svt_aom_get_txb_ctx(pcs, |
903 | 0 | COMPONENT_CHROMA, |
904 | 0 | pcs->ep_cr_dc_sign_level_coeff_na[tile_idx], |
905 | 0 | blk_originx_uv, |
906 | 0 | blk_originy_uv, |
907 | 0 | ed_ctx->blk_geom->bsize_uv, |
908 | 0 | tx_size_uv, |
909 | 0 | &ed_ctx->md_ctx->cr_txb_skip_context, |
910 | 0 | &ed_ctx->md_ctx->cr_dc_sign_context); |
911 | | |
912 | | // Generate prediction for both chroma planes |
913 | 0 | for (Plane plane = PLANE_U; plane <= PLANE_V; ++plane) { |
914 | 0 | uint8_t top_neigh_array[(64 * 2 + 1) << 1]; |
915 | 0 | uint8_t left_neigh_array[(64 * 2 + 1) << 1]; |
916 | | |
917 | | // Copy neighbour arrays for intra prediction |
918 | 0 | const PredictionMode mode = (blk_ptr->block_mi.uv_mode == UV_CFL_PRED) |
919 | 0 | ? (PredictionMode)UV_DC_PRED |
920 | 0 | : (PredictionMode)blk_ptr->block_mi.uv_mode; |
921 | 0 | const int ang = blk_ptr->block_mi.angle_delta[PLANE_TYPE_UV]; |
922 | 0 | const IntraSize intra_size = ang == 0 ? svt_aom_intra_unit[mode] : (IntraSize){2, 2}; |
923 | 0 | NeighborArrayUnit* eb_uv_neigh_array = plane == 1 ? ep_cb_recon_na : ep_cr_recon_na; |
924 | 0 | if (blk_originy_uv != 0) { |
925 | 0 | svt_memcpy(top_neigh_array + ((uint64_t)1 << is_16bit), |
926 | 0 | eb_uv_neigh_array->top_array + (blk_originx_uv << is_16bit), |
927 | 0 | (ed_ctx->blk_geom->bwidth_uv * intra_size.top) << is_16bit); |
928 | 0 | } |
929 | |
|
930 | 0 | if (blk_originx_uv != 0) { |
931 | 0 | uint16_t multipler = (blk_originy_uv % sb_size_chroma + |
932 | 0 | ed_ctx->blk_geom->bheight_uv * intra_size.left) > sb_size_chroma |
933 | 0 | ? 1 |
934 | 0 | : intra_size.left; |
935 | 0 | svt_memcpy(left_neigh_array + ((uint64_t)1 << is_16bit), |
936 | 0 | eb_uv_neigh_array->left_array + (blk_originy_uv << is_16bit), |
937 | 0 | (ed_ctx->blk_geom->bheight_uv * multipler) << is_16bit); |
938 | 0 | } |
939 | |
|
940 | 0 | if (blk_originy_uv != 0 && blk_originx_uv != 0) { |
941 | 0 | if (is_16bit) { |
942 | 0 | uint16_t* top_hbd = (uint16_t*)top_neigh_array; |
943 | 0 | uint16_t* left_hbd = (uint16_t*)left_neigh_array; |
944 | 0 | top_hbd[0] = left_hbd[0] = ((uint16_t*)(eb_uv_neigh_array->top_left_array) + |
945 | 0 | eb_uv_neigh_array->max_pic_h + blk_originx_uv - blk_originy_uv)[0]; |
946 | 0 | } else { |
947 | 0 | top_neigh_array[0] = left_neigh_array[0] = |
948 | 0 | eb_uv_neigh_array |
949 | 0 | ->top_left_array[eb_uv_neigh_array->max_pic_h + blk_originx_uv - blk_originy_uv]; |
950 | 0 | } |
951 | 0 | } |
952 | |
|
953 | 0 | svt_av1_predict_intra_block(blk_ptr->av1xd, |
954 | 0 | ed_ctx->blk_geom->bsize, |
955 | 0 | tx_size_uv, |
956 | 0 | mode, |
957 | 0 | blk_ptr->block_mi.angle_delta[PLANE_TYPE_UV], |
958 | 0 | 0, //chroma |
959 | 0 | blk_ptr->palette_info, |
960 | 0 | FILTER_INTRA_MODES, |
961 | 0 | top_neigh_array + ((uint64_t)1 << is_16bit), |
962 | 0 | left_neigh_array + ((uint64_t)1 << is_16bit), |
963 | 0 | recon_buffer, |
964 | 0 | 0, |
965 | 0 | 0, |
966 | 0 | plane, |
967 | 0 | ed_ctx->md_ctx->shape, |
968 | 0 | plane ? ROUND_UV(ed_ctx->blk_org_x) >> 1 : txb_origin_x, |
969 | 0 | plane ? ROUND_UV(ed_ctx->blk_org_y) >> 1 : txb_origin_y, |
970 | 0 | &pcs->scs->seq_header, |
971 | 0 | ed_ctx->bit_depth); |
972 | 0 | } |
973 | | |
974 | | // Encode Transform Unit -INTRA- |
975 | 0 | av1_encode_loop(pcs, |
976 | 0 | ed_ctx, |
977 | 0 | txb_origin_x, |
978 | 0 | txb_origin_y, |
979 | 0 | recon_buffer, |
980 | 0 | coeff_buffer_sb, |
981 | 0 | residual_buffer, |
982 | 0 | transform_buffer, |
983 | 0 | inverse_quant_buffer, |
984 | 0 | PICTURE_BUFFER_DESC_CHROMA_MASK, |
985 | 0 | eobs[ed_ctx->txb_itr]); |
986 | 0 | av1_encode_generate_recon(pcs, |
987 | 0 | ed_ctx, |
988 | 0 | txb_origin_x, |
989 | 0 | txb_origin_y, |
990 | 0 | recon_buffer, |
991 | 0 | inverse_quant_buffer, |
992 | 0 | PICTURE_BUFFER_DESC_CHROMA_MASK, |
993 | 0 | eobs[ed_ctx->txb_itr]); |
994 | | |
995 | | // Update Recon Samples-INTRA- |
996 | 0 | encode_pass_update_recon_sample_neighbour_arrays(ep_luma_recon_na, |
997 | 0 | ep_cb_recon_na, |
998 | 0 | ep_cr_recon_na, |
999 | 0 | recon_buffer, |
1000 | 0 | txb_origin_x, |
1001 | 0 | txb_origin_y, |
1002 | 0 | tx_width, |
1003 | 0 | tx_height, |
1004 | 0 | tx_width_uv, |
1005 | 0 | tx_height_uv, |
1006 | 0 | PICTURE_BUFFER_DESC_CHROMA_MASK, |
1007 | 0 | is_16bit); |
1008 | |
|
1009 | 0 | ed_ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv; |
1010 | | |
1011 | | // Update the cb Dc Sign Level Coeff Neighbor Array |
1012 | 0 | { |
1013 | 0 | uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.u[ed_ctx->txb_itr]; |
1014 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_cb_dc_sign_level_coeff_na[tile_idx], |
1015 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1016 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1017 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1018 | 0 | tx_width_uv, |
1019 | 0 | tx_height_uv, |
1020 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1021 | 0 | } |
1022 | | |
1023 | | // Update the cr DC Sign Level Coeff Neighbor Array |
1024 | 0 | { |
1025 | 0 | uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.v[ed_ctx->txb_itr]; |
1026 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_cr_dc_sign_level_coeff_na[tile_idx], |
1027 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1028 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1029 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1030 | 0 | tx_width_uv, |
1031 | 0 | tx_height_uv, |
1032 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1033 | 0 | } |
1034 | 0 | } // Transform Loop |
1035 | 0 | assert(IMPLIES(!ed_ctx->md_ctx->has_uv, blk_ptr->u_has_coeff == 0 && blk_ptr->v_has_coeff == 0)); |
1036 | 0 | blk_ptr->block_has_coeff = (blk_ptr->y_has_coeff || blk_ptr->u_has_coeff || blk_ptr->v_has_coeff); |
1037 | 0 | } |
1038 | | |
1039 | 0 | #define REFMVS_LIMIT ((1 << 12) - 1) |
1040 | | |
1041 | | static void av1_copy_frame_mvs(PictureControlSet* pcs, const Av1Common* const cm, MbModeInfo mi, int mi_row, int mi_col, |
1042 | 0 | int x_mis, int y_mis, EbReferenceObject* object_ptr) { |
1043 | 0 | const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1); |
1044 | 0 | MV_REF* frame_mvs = object_ptr->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1); |
1045 | 0 | x_mis = ROUND_POWER_OF_TWO(x_mis, 1); |
1046 | 0 | y_mis = ROUND_POWER_OF_TWO(y_mis, 1); |
1047 | 0 | int w, h; |
1048 | |
|
1049 | 0 | for (h = 0; h < y_mis; h++) { |
1050 | 0 | MV_REF* mv = frame_mvs; |
1051 | 0 | for (w = 0; w < x_mis; w++) { |
1052 | 0 | mv->ref_frame = NONE_FRAME; |
1053 | 0 | mv->mv.as_int = 0; |
1054 | |
|
1055 | 0 | for (int idx = 0; idx < 2; ++idx) { |
1056 | 0 | MvReferenceFrame ref_frame = mi.block_mi.ref_frame[idx]; |
1057 | 0 | if (ref_frame > INTRA_FRAME) { |
1058 | 0 | int8_t ref_idx = pcs->ref_frame_side[ref_frame]; |
1059 | 0 | if (ref_idx) { |
1060 | 0 | continue; |
1061 | 0 | } |
1062 | 0 | if ((abs(mi.block_mi.mv[idx].y) > REFMVS_LIMIT) || (abs(mi.block_mi.mv[idx].x) > REFMVS_LIMIT)) { |
1063 | 0 | continue; |
1064 | 0 | } |
1065 | 0 | mv->ref_frame = ref_frame; |
1066 | 0 | mv->mv.as_int = mi.block_mi.mv[idx].as_int; |
1067 | 0 | } |
1068 | 0 | } |
1069 | 0 | mv++; |
1070 | 0 | } |
1071 | 0 | frame_mvs += frame_mvs_stride; |
1072 | 0 | } |
1073 | 0 | } |
1074 | | |
1075 | | /* |
1076 | | * Convert the recon picture from 16bit to 8bit. Recon pic is passed through the pcs. |
1077 | | */ |
1078 | 0 | void svt_aom_convert_recon_16bit_to_8bit(PictureControlSet* pcs, EncDecContext* ctx) { |
1079 | 0 | EbPictureBufferDesc* recon_buffer_16bit; |
1080 | 0 | EbPictureBufferDesc* recon_buffer_8bit; |
1081 | 0 | svt_aom_get_recon_pic(pcs, &recon_buffer_16bit, 1); |
1082 | 0 | if (pcs->ppcs->is_ref == true) { |
1083 | | // get the 16bit form of the input SB |
1084 | 0 | recon_buffer_8bit = ((EbReferenceObject*)pcs->ppcs->ref_pic_wrapper->object_ptr)->reference_picture; |
1085 | 0 | } else { // non ref pictures |
1086 | 0 | recon_buffer_8bit = pcs->ppcs->enc_dec_ptr->recon_pic; |
1087 | 0 | } |
1088 | |
|
1089 | 0 | uint32_t pred_buf_x_offest = ctx->blk_org_x; |
1090 | 0 | uint32_t pred_buf_y_offest = ctx->blk_org_y; |
1091 | |
|
1092 | 0 | uint16_t* dst_16bit = (uint16_t*)(recon_buffer_16bit->y_buffer) + pred_buf_x_offest + |
1093 | 0 | (pred_buf_y_offest * recon_buffer_16bit->y_stride); |
1094 | 0 | int32_t dst_stride_16bit = recon_buffer_16bit->y_stride; |
1095 | |
|
1096 | 0 | uint8_t* dst = recon_buffer_8bit->y_buffer + pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_8bit->y_stride); |
1097 | 0 | int32_t dst_stride = recon_buffer_8bit->y_stride; |
1098 | |
|
1099 | 0 | svt_convert_16bit_to_8bit( |
1100 | 0 | dst_16bit, dst_stride_16bit, dst, dst_stride, ctx->blk_geom->bwidth, ctx->blk_geom->bheight); |
1101 | | |
1102 | | //copy recon from 16bit to 8bit |
1103 | 0 | pred_buf_x_offest = ROUND_UV(ctx->blk_org_x) >> 1; |
1104 | 0 | pred_buf_y_offest = ROUND_UV(ctx->blk_org_y) >> 1; |
1105 | |
|
1106 | 0 | dst_16bit = (uint16_t*)(recon_buffer_16bit->u_buffer) + pred_buf_x_offest + |
1107 | 0 | (pred_buf_y_offest * recon_buffer_16bit->u_stride); |
1108 | 0 | dst_stride_16bit = recon_buffer_16bit->u_stride; |
1109 | |
|
1110 | 0 | dst = recon_buffer_8bit->u_buffer + pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_8bit->u_stride); |
1111 | 0 | dst_stride = recon_buffer_8bit->u_stride; |
1112 | |
|
1113 | 0 | svt_convert_16bit_to_8bit( |
1114 | 0 | dst_16bit, dst_stride_16bit, dst, dst_stride, ctx->blk_geom->bwidth_uv, ctx->blk_geom->bheight_uv); |
1115 | |
|
1116 | 0 | dst_16bit = (uint16_t*)(recon_buffer_16bit->v_buffer) + |
1117 | 0 | (pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_16bit->v_stride)); |
1118 | 0 | dst_stride_16bit = recon_buffer_16bit->v_stride; |
1119 | 0 | dst = recon_buffer_8bit->v_buffer + pred_buf_x_offest + (pred_buf_y_offest * recon_buffer_8bit->v_stride); |
1120 | 0 | dst_stride = recon_buffer_8bit->v_stride; |
1121 | |
|
1122 | 0 | svt_convert_16bit_to_8bit( |
1123 | 0 | dst_16bit, dst_stride_16bit, dst, dst_stride, ctx->blk_geom->bwidth_uv, ctx->blk_geom->bheight_uv); |
1124 | 0 | } |
1125 | | |
1126 | | /* |
1127 | | * Inter coding loop for EncDec process. |
1128 | | * |
1129 | | * For the given mode info, perform inter prediction, transform and recon. |
1130 | | * Update relevant neighbour arrays. |
1131 | | */ |
1132 | 0 | static void perform_inter_coding_loop(PictureControlSet* pcs, EncDecContext* ctx) { |
1133 | 0 | SequenceControlSet* scs = pcs->scs; |
1134 | 0 | const BlockGeom* blk_geom = ctx->blk_geom; |
1135 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
1136 | | |
1137 | | // temp buffers for performing the transform/generating the recon |
1138 | 0 | EbPictureBufferDesc* residual_buffer = ctx->md_ctx->temp_residual; |
1139 | 0 | EbPictureBufferDesc* transform_buffer = ctx->md_ctx->tx_coeffs; |
1140 | 0 | EbPictureBufferDesc* inverse_quant_buffer = ctx->md_ctx->cand_bf_ptr_array[0]->rec_coeff; |
1141 | |
|
1142 | 0 | bool is_16bit = ctx->is_16bit; |
1143 | 0 | EbPictureBufferDesc* recon_buffer; |
1144 | 0 | EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]; |
1145 | 0 | ModeDecisionContext* md_ctx = ctx->md_ctx; |
1146 | 0 | const int is_inter = is_inter_block(&blk_ptr->block_mi); |
1147 | 0 | assert(is_inter); |
1148 | | |
1149 | | // Dereferencing early |
1150 | 0 | uint16_t tile_idx = ctx->tile_index; |
1151 | |
|
1152 | 0 | NeighborArrayUnit* ep_luma_recon_na = is_16bit ? pcs->ep_luma_recon_na_16bit[tile_idx] |
1153 | 0 | : pcs->ep_luma_recon_na[tile_idx]; |
1154 | 0 | NeighborArrayUnit* ep_cb_recon_na = is_16bit ? pcs->ep_cb_recon_na_16bit[tile_idx] : pcs->ep_cb_recon_na[tile_idx]; |
1155 | 0 | NeighborArrayUnit* ep_cr_recon_na = is_16bit ? pcs->ep_cr_recon_na_16bit[tile_idx] : pcs->ep_cr_recon_na[tile_idx]; |
1156 | |
|
1157 | 0 | svt_aom_get_recon_pic(pcs, &recon_buffer, is_16bit); |
1158 | | |
1159 | | // Inter Prediction |
1160 | 0 | EbPictureBufferDesc* ref_pic_list0; |
1161 | 0 | EbPictureBufferDesc* ref_pic_list1; |
1162 | 0 | if (blk_ptr->block_mi.use_intrabc) { |
1163 | 0 | svt_aom_get_recon_pic(pcs, &ref_pic_list0, is_16bit); |
1164 | 0 | ref_pic_list1 = (EbPictureBufferDesc*)NULL; |
1165 | 0 | } else { |
1166 | 0 | ref_pic_list0 = svt_aom_get_ref_pic_buffer(pcs, blk_ptr->block_mi.ref_frame[0]); |
1167 | 0 | ref_pic_list1 = svt_aom_get_ref_pic_buffer(pcs, blk_ptr->block_mi.ref_frame[1]); |
1168 | 0 | } |
1169 | |
|
1170 | 0 | svt_aom_inter_prediction(scs, |
1171 | 0 | pcs, |
1172 | 0 | &blk_ptr->block_mi, |
1173 | 0 | &md_ctx->blk_ptr->wm_params_l0, |
1174 | 0 | &md_ctx->blk_ptr->wm_params_l1, |
1175 | 0 | blk_ptr, |
1176 | 0 | blk_geom->bsize, |
1177 | 0 | ctx->md_ctx->shape, |
1178 | 0 | false, //use_precomputed_obmc, |
1179 | 0 | false, //use_precomputed_ii |
1180 | 0 | NULL, // md_ctx - only needed for precompute obmc/ii |
1181 | 0 | ep_luma_recon_na, |
1182 | 0 | ep_cb_recon_na, |
1183 | 0 | ep_cr_recon_na, |
1184 | 0 | ref_pic_list0, |
1185 | 0 | ref_pic_list1, |
1186 | 0 | ctx->blk_org_x, |
1187 | 0 | ctx->blk_org_y, |
1188 | 0 | recon_buffer, |
1189 | 0 | ctx->blk_org_x, |
1190 | 0 | ctx->blk_org_y, |
1191 | 0 | md_ctx->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK, |
1192 | 0 | (uint8_t)scs->static_config.encoder_bit_depth, |
1193 | 0 | is_16bit); |
1194 | | |
1195 | | // Transform Loop |
1196 | 0 | blk_ptr->y_has_coeff = 0; |
1197 | 0 | blk_ptr->u_has_coeff = 0; |
1198 | 0 | blk_ptr->v_has_coeff = 0; |
1199 | | |
1200 | | // Initialize the Transform Loop |
1201 | 0 | uint16_t eobs[MAX_TXB_COUNT][3]; |
1202 | 0 | const uint8_t tx_depth = blk_ptr->block_mi.tx_depth; |
1203 | 0 | const uint16_t tot_tu = tx_blocks_per_depth[blk_geom->bsize][tx_depth]; |
1204 | 0 | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][blk_geom->bsize]; |
1205 | 0 | const int tx_width = tx_size_wide[tx_size]; |
1206 | 0 | const int tx_height = tx_size_high[tx_size]; |
1207 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1); |
1208 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
1209 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
1210 | |
|
1211 | 0 | for (ctx->txb_itr = 0; ctx->txb_itr < tot_tu; ctx->txb_itr++) { |
1212 | 0 | const uint8_t uv_pass = tx_depth && ctx->txb_itr ? 0 : 1; //NM: 128x128 exeption |
1213 | 0 | const uint16_t txb_origin_x = ctx->blk_org_x + tx_org[blk_geom->bsize][is_inter][tx_depth][ctx->txb_itr].x; |
1214 | 0 | const uint16_t txb_origin_y = ctx->blk_org_y + tx_org[blk_geom->bsize][is_inter][tx_depth][ctx->txb_itr].y; |
1215 | 0 | md_ctx->luma_txb_skip_context = 0; |
1216 | 0 | md_ctx->luma_dc_sign_context = 0; |
1217 | 0 | svt_aom_get_txb_ctx(pcs, |
1218 | 0 | COMPONENT_LUMA, |
1219 | 0 | pcs->ep_luma_dc_sign_level_coeff_na[tile_idx], |
1220 | 0 | txb_origin_x, |
1221 | 0 | txb_origin_y, |
1222 | 0 | blk_geom->bsize, |
1223 | 0 | tx_size, |
1224 | 0 | &md_ctx->luma_txb_skip_context, |
1225 | 0 | &md_ctx->luma_dc_sign_context); |
1226 | |
|
1227 | 0 | if (md_ctx->has_uv && uv_pass) { |
1228 | 0 | md_ctx->cb_txb_skip_context = 0; |
1229 | 0 | md_ctx->cb_dc_sign_context = 0; |
1230 | 0 | svt_aom_get_txb_ctx(pcs, |
1231 | 0 | COMPONENT_CHROMA, |
1232 | 0 | pcs->ep_cb_dc_sign_level_coeff_na[tile_idx], |
1233 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1234 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1235 | 0 | blk_geom->bsize_uv, |
1236 | 0 | tx_size_uv, |
1237 | 0 | &md_ctx->cb_txb_skip_context, |
1238 | 0 | &md_ctx->cb_dc_sign_context); |
1239 | |
|
1240 | 0 | md_ctx->cr_txb_skip_context = 0; |
1241 | 0 | md_ctx->cr_dc_sign_context = 0; |
1242 | 0 | svt_aom_get_txb_ctx(pcs, |
1243 | 0 | COMPONENT_CHROMA, |
1244 | 0 | pcs->ep_cr_dc_sign_level_coeff_na[tile_idx], |
1245 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1246 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1247 | 0 | blk_geom->bsize_uv, |
1248 | 0 | tx_size_uv, |
1249 | 0 | &md_ctx->cr_txb_skip_context, |
1250 | 0 | &md_ctx->cr_dc_sign_context); |
1251 | 0 | } |
1252 | 0 | if (blk_ptr->block_mi.skip_mode == true) { |
1253 | 0 | blk_ptr->y_has_coeff = 0; |
1254 | 0 | blk_ptr->u_has_coeff = 0; |
1255 | 0 | blk_ptr->v_has_coeff = 0; |
1256 | |
|
1257 | 0 | blk_ptr->quant_dc.y[ctx->txb_itr] = 0; |
1258 | 0 | blk_ptr->quant_dc.u[ctx->txb_itr] = 0; |
1259 | 0 | blk_ptr->quant_dc.v[ctx->txb_itr] = 0; |
1260 | 0 | } else { |
1261 | | //inter mode 2 |
1262 | 0 | av1_encode_loop(pcs, |
1263 | 0 | ctx, |
1264 | 0 | txb_origin_x, //pic offset |
1265 | 0 | txb_origin_y, |
1266 | 0 | recon_buffer, |
1267 | 0 | coeff_buffer_sb, |
1268 | 0 | residual_buffer, |
1269 | 0 | transform_buffer, |
1270 | 0 | inverse_quant_buffer, |
1271 | 0 | md_ctx->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK, |
1272 | 0 | eobs[ctx->txb_itr]); |
1273 | 0 | } |
1274 | | |
1275 | | //inter mode |
1276 | 0 | av1_encode_generate_recon( |
1277 | 0 | pcs, |
1278 | 0 | ctx, |
1279 | 0 | txb_origin_x, //pic offset |
1280 | 0 | txb_origin_y, |
1281 | 0 | recon_buffer, |
1282 | 0 | inverse_quant_buffer, |
1283 | 0 | md_ctx->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK, |
1284 | 0 | eobs[ctx->txb_itr]); |
1285 | |
|
1286 | 0 | ctx->coded_area_sb += tx_width * tx_height; |
1287 | |
|
1288 | 0 | if (md_ctx->has_uv && uv_pass) { |
1289 | 0 | ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv; |
1290 | 0 | } |
1291 | | |
1292 | | // Update the luma Dc Sign Level Coeff Neighbor Array |
1293 | 0 | uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.y[ctx->txb_itr]; |
1294 | |
|
1295 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_luma_dc_sign_level_coeff_na[tile_idx], |
1296 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1297 | 0 | txb_origin_x, |
1298 | 0 | txb_origin_y, |
1299 | 0 | tx_width, |
1300 | 0 | tx_height, |
1301 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1302 | | |
1303 | | // Update the cb Dc Sign Level Coeff Neighbor Array |
1304 | 0 | if (md_ctx->has_uv && uv_pass) { |
1305 | 0 | dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.u[ctx->txb_itr]; |
1306 | |
|
1307 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_cb_dc_sign_level_coeff_na[tile_idx], |
1308 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1309 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1310 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1311 | 0 | tx_width_uv, |
1312 | 0 | tx_height_uv, |
1313 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1314 | | // Update the cr DC Sign Level Coeff Neighbor Array |
1315 | 0 | dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.v[ctx->txb_itr]; |
1316 | |
|
1317 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_cr_dc_sign_level_coeff_na[tile_idx], |
1318 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1319 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1320 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1321 | 0 | tx_width_uv, |
1322 | 0 | tx_height_uv, |
1323 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1324 | 0 | } |
1325 | |
|
1326 | 0 | } // Transform Loop |
1327 | |
|
1328 | 0 | assert(IMPLIES(!md_ctx->has_uv, blk_ptr->u_has_coeff == 0 && blk_ptr->v_has_coeff == 0)); |
1329 | 0 | blk_ptr->block_has_coeff = (blk_ptr->y_has_coeff || blk_ptr->u_has_coeff || blk_ptr->v_has_coeff); |
1330 | | |
1331 | | // Update Recon Samples Neighbor Arrays -INTER- |
1332 | 0 | encode_pass_update_recon_sample_neighbour_arrays( |
1333 | 0 | ep_luma_recon_na, |
1334 | 0 | ep_cb_recon_na, |
1335 | 0 | ep_cr_recon_na, |
1336 | 0 | recon_buffer, |
1337 | 0 | ctx->blk_org_x, |
1338 | 0 | ctx->blk_org_y, |
1339 | 0 | ctx->blk_geom->bwidth, |
1340 | 0 | ctx->blk_geom->bheight, |
1341 | 0 | ctx->blk_geom->bwidth_uv, |
1342 | 0 | ctx->blk_geom->bheight_uv, |
1343 | 0 | md_ctx->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK, |
1344 | 0 | is_16bit); |
1345 | 0 | } |
1346 | | |
1347 | | // Copy recon to EncDec buffers if EncDec was bypassed. If pred depth only was used and NSQ is OFF data |
1348 | | // was copied directly to EncDec buffers in MD. |
1349 | 120k | static void copy_recon(PictureControlSet* pcs, ModeDecisionContext* ctx, BlkStruct* blk_ptr) { |
1350 | 120k | const bool is_16bit = ctx->ed_ctx->is_16bit; |
1351 | 120k | EbPictureBufferDesc* recon_buffer; |
1352 | 120k | svt_aom_get_recon_pic(pcs, &recon_buffer, is_16bit); |
1353 | 120k | if (ctx->encoder_bit_depth > EB_EIGHT_BIT) { |
1354 | 0 | uint32_t recon_luma_offset = (ctx->blk_org_y * recon_buffer->y_stride) + ctx->blk_org_x; |
1355 | 0 | uint16_t* ep_recon = ((uint16_t*)(recon_buffer->y_buffer)) + recon_luma_offset; |
1356 | 0 | uint16_t* md_recon = (uint16_t*)(blk_ptr->recon_tmp->y_buffer); |
1357 | |
|
1358 | 0 | for (uint32_t i = 0; i < ctx->blk_geom->bheight; i++) { |
1359 | 0 | svt_memcpy(ep_recon + i * recon_buffer->y_stride, |
1360 | 0 | md_recon + i * blk_ptr->recon_tmp->y_stride, |
1361 | 0 | ctx->blk_geom->bwidth * sizeof(uint16_t)); |
1362 | 0 | } |
1363 | |
|
1364 | 0 | if (ctx->has_uv) { |
1365 | 0 | uint32_t round_origin_x = ROUND_UV(ctx->blk_org_x); // for Chroma blocks with size of 4 |
1366 | 0 | uint32_t round_origin_y = ROUND_UV(ctx->blk_org_y); // for Chroma blocks with size of 4 |
1367 | | |
1368 | | // Cr |
1369 | 0 | uint32_t recon_cr_offset = ((round_origin_y >> 1) * recon_buffer->v_stride) + (round_origin_x >> 1); |
1370 | 0 | uint16_t* ep_recon_cr = ((uint16_t*)(recon_buffer->v_buffer)) + recon_cr_offset; |
1371 | 0 | uint16_t* md_recon_cr = (uint16_t*)(blk_ptr->recon_tmp->v_buffer); |
1372 | |
|
1373 | 0 | for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) { |
1374 | 0 | svt_memcpy(ep_recon_cr + i * recon_buffer->v_stride, |
1375 | 0 | md_recon_cr + i * blk_ptr->recon_tmp->v_stride, |
1376 | 0 | ctx->blk_geom->bwidth_uv * sizeof(uint16_t)); |
1377 | 0 | } |
1378 | | |
1379 | | // Cb |
1380 | 0 | uint32_t recon_cb_offset = ((round_origin_y >> 1) * recon_buffer->u_stride) + (round_origin_x >> 1); |
1381 | 0 | uint16_t* ep_recon_cb = ((uint16_t*)(recon_buffer->u_buffer)) + recon_cb_offset; |
1382 | 0 | uint16_t* md_recon_cb = (uint16_t*)(blk_ptr->recon_tmp->u_buffer); |
1383 | |
|
1384 | 0 | for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) { |
1385 | 0 | svt_memcpy(ep_recon_cb + i * recon_buffer->u_stride, |
1386 | 0 | md_recon_cb + i * blk_ptr->recon_tmp->u_stride, |
1387 | 0 | ctx->blk_geom->bwidth_uv * sizeof(uint16_t)); |
1388 | 0 | } |
1389 | 0 | } |
1390 | 120k | } else { |
1391 | 120k | uint32_t recon_luma_offset = (ctx->blk_org_y * recon_buffer->y_stride) + ctx->blk_org_x; |
1392 | 120k | uint8_t* ep_recon = recon_buffer->y_buffer + recon_luma_offset; |
1393 | 120k | uint8_t* md_recon = blk_ptr->recon_tmp->y_buffer; |
1394 | | |
1395 | 1.08M | for (uint32_t i = 0; i < ctx->blk_geom->bheight; i++) { |
1396 | 964k | svt_memcpy(ep_recon + i * recon_buffer->y_stride, |
1397 | 964k | md_recon + i * blk_ptr->recon_tmp->y_stride, |
1398 | 964k | ctx->blk_geom->bwidth * sizeof(uint8_t)); |
1399 | 964k | } |
1400 | | |
1401 | 120k | if (ctx->has_uv) { |
1402 | 120k | uint32_t round_origin_x = ROUND_UV(ctx->blk_org_x); // for Chroma blocks with size of 4 |
1403 | 120k | uint32_t round_origin_y = ROUND_UV(ctx->blk_org_y); // for Chroma blocks with size of 4 |
1404 | | |
1405 | | // Cr |
1406 | 120k | uint32_t recon_cr_offset = ((round_origin_y >> 1) * recon_buffer->v_stride) + (round_origin_x >> 1); |
1407 | 120k | uint8_t* ep_recon_cr = recon_buffer->v_buffer + recon_cr_offset; |
1408 | 120k | uint8_t* md_recon_cr = blk_ptr->recon_tmp->v_buffer; |
1409 | | |
1410 | 603k | for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) { |
1411 | 482k | svt_memcpy(ep_recon_cr + i * recon_buffer->v_stride, |
1412 | 482k | md_recon_cr + i * blk_ptr->recon_tmp->v_stride, |
1413 | 482k | ctx->blk_geom->bwidth_uv * sizeof(uint8_t)); |
1414 | 482k | } |
1415 | | |
1416 | | // Cb |
1417 | 120k | uint32_t recon_cb_offset = ((round_origin_y >> 1) * recon_buffer->u_stride) + (round_origin_x >> 1); |
1418 | 120k | uint8_t* ep_recon_cb = recon_buffer->u_buffer + recon_cb_offset; |
1419 | 120k | uint8_t* md_recon_cb = blk_ptr->recon_tmp->u_buffer; |
1420 | | |
1421 | 603k | for (uint32_t i = 0; i < ctx->blk_geom->bheight_uv; i++) { |
1422 | 482k | svt_memcpy(ep_recon_cb + i * recon_buffer->u_stride, |
1423 | 482k | md_recon_cb + i * blk_ptr->recon_tmp->u_stride, |
1424 | 482k | ctx->blk_geom->bwidth_uv * sizeof(uint8_t)); |
1425 | 482k | } |
1426 | 120k | } |
1427 | 120k | } |
1428 | 120k | } |
1429 | | |
1430 | | // Copy quantized coeffs to EncDec buffers if EncDec was bypassed. If pred depth only was used and NSQ is OFF data |
1431 | | // was copied directly to EncDec buffers in MD. |
1432 | | static void copy_qcoeffs(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr, uint32_t blk_coded_area, |
1433 | 482k | uint32_t blk_coded_area_uv) { |
1434 | 482k | const BlockGeom* blk_geom = ctx->blk_geom; |
1435 | 482k | EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]; |
1436 | 482k | const uint8_t tx_depth = blk_ptr->block_mi.tx_depth; |
1437 | 482k | const uint8_t txb_itr = ctx->txb_itr; |
1438 | 482k | const uint8_t uv_pass = tx_depth && txb_itr ? 0 : 1; //NM: 128x128 exeption |
1439 | | |
1440 | 482k | int32_t* ep_coeff = ((int32_t*)coeff_buffer_sb->y_buffer) + ctx->coded_area_sb_update; |
1441 | 482k | int32_t* md_coeff = ((int32_t*)blk_ptr->coeff_tmp->y_buffer) + blk_coded_area; |
1442 | | |
1443 | 482k | if ((blk_ptr->y_has_coeff & (1 << txb_itr))) { |
1444 | 2.07k | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][blk_geom->bsize]; |
1445 | 2.07k | const int tx_width = tx_size_wide[tx_size]; |
1446 | 2.07k | const int tx_height = tx_size_high[tx_size]; |
1447 | 2.07k | svt_memcpy(ep_coeff, md_coeff, sizeof(int32_t) * tx_height * tx_width); |
1448 | 2.07k | } |
1449 | | |
1450 | 482k | if (ctx->md_ctx->has_uv && uv_pass) { |
1451 | 120k | const TxSize tx_size_uv = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1); |
1452 | 120k | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
1453 | 120k | const int tx_height_uv = tx_size_high[tx_size_uv]; |
1454 | 120k | int32_t* ep_coeff_cb = ((int32_t*)coeff_buffer_sb->u_buffer) + ctx->coded_area_sb_uv_update; |
1455 | 120k | int32_t* md_coeff_cb = ((int32_t*)blk_ptr->coeff_tmp->u_buffer) + blk_coded_area_uv; |
1456 | | |
1457 | 120k | if ((blk_ptr->u_has_coeff & (1 << txb_itr))) { |
1458 | 2.07k | svt_memcpy(ep_coeff_cb, md_coeff_cb, sizeof(int32_t) * tx_height_uv * tx_width_uv); |
1459 | 2.07k | } |
1460 | | |
1461 | 120k | int32_t* ep_coeff_cr = ((int32_t*)coeff_buffer_sb->v_buffer) + ctx->coded_area_sb_uv_update; |
1462 | 120k | int32_t* md_coeff_cr = ((int32_t*)blk_ptr->coeff_tmp->v_buffer) + blk_coded_area_uv; |
1463 | | |
1464 | 120k | if ((blk_ptr->v_has_coeff & (1 << txb_itr))) { |
1465 | 2.07k | svt_memcpy(ep_coeff_cr, md_coeff_cr, sizeof(int32_t) * tx_height_uv * tx_width_uv); |
1466 | 2.07k | } |
1467 | 120k | } |
1468 | 482k | } |
1469 | | |
1470 | | // Perform CDF update (MD feature) for coeff-related CDFs |
1471 | 0 | void update_coeff_cdf(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr) { |
1472 | 0 | ModeDecisionContext* md_ctx = ctx->md_ctx; |
1473 | 0 | const BlockGeom* blk_geom = ctx->blk_geom; |
1474 | 0 | EbPictureBufferDesc* coeff_buffer_sb = pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]; |
1475 | 0 | const uint8_t tx_depth = blk_ptr->block_mi.tx_depth; |
1476 | 0 | const uint8_t txb_itr = ctx->txb_itr; |
1477 | 0 | const uint8_t uv_pass = tx_depth && ctx->txb_itr ? 0 : 1; //NM: 128x128 exeption |
1478 | 0 | const uint16_t tile_idx = ctx->tile_index; |
1479 | 0 | const int is_inter = is_inter_block(&blk_ptr->block_mi); |
1480 | 0 | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][blk_geom->bsize]; |
1481 | 0 | const int tx_width = tx_size_wide[tx_size]; |
1482 | 0 | const int tx_height = tx_size_high[tx_size]; |
1483 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1); |
1484 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
1485 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
1486 | 0 | const uint16_t txb_origin_x = ctx->blk_org_x + tx_org[blk_geom->bsize][is_inter][tx_depth][txb_itr].x; |
1487 | 0 | const uint16_t txb_origin_y = ctx->blk_org_y + tx_org[blk_geom->bsize][is_inter][tx_depth][txb_itr].y; |
1488 | |
|
1489 | 0 | md_ctx->luma_txb_skip_context = 0; |
1490 | 0 | md_ctx->luma_dc_sign_context = 0; |
1491 | 0 | svt_aom_get_txb_ctx(pcs, |
1492 | 0 | COMPONENT_LUMA, |
1493 | 0 | pcs->ep_luma_dc_sign_level_coeff_na_update[tile_idx], |
1494 | 0 | txb_origin_x, |
1495 | 0 | txb_origin_y, |
1496 | 0 | blk_geom->bsize, |
1497 | 0 | tx_size, |
1498 | 0 | &md_ctx->luma_txb_skip_context, |
1499 | 0 | &md_ctx->luma_dc_sign_context); |
1500 | |
|
1501 | 0 | if (md_ctx->has_uv && uv_pass) { |
1502 | 0 | md_ctx->cb_txb_skip_context = 0; |
1503 | 0 | md_ctx->cb_dc_sign_context = 0; |
1504 | 0 | svt_aom_get_txb_ctx(pcs, |
1505 | 0 | COMPONENT_CHROMA, |
1506 | 0 | pcs->ep_cb_dc_sign_level_coeff_na_update[tile_idx], |
1507 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1508 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1509 | 0 | blk_geom->bsize_uv, |
1510 | 0 | tx_size_uv, |
1511 | 0 | &md_ctx->cb_txb_skip_context, |
1512 | 0 | &md_ctx->cb_dc_sign_context); |
1513 | |
|
1514 | 0 | md_ctx->cr_txb_skip_context = 0; |
1515 | 0 | md_ctx->cr_dc_sign_context = 0; |
1516 | 0 | svt_aom_get_txb_ctx(pcs, |
1517 | 0 | COMPONENT_CHROMA, |
1518 | 0 | pcs->ep_cr_dc_sign_level_coeff_na_update[tile_idx], |
1519 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1520 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1521 | 0 | blk_geom->bsize_uv, |
1522 | 0 | tx_size_uv, |
1523 | 0 | &md_ctx->cr_txb_skip_context, |
1524 | 0 | &md_ctx->cr_dc_sign_context); |
1525 | 0 | } |
1526 | |
|
1527 | 0 | ModeDecisionCandidateBuffer** cand_bf_ptr_array_base = md_ctx->cand_bf_ptr_array; |
1528 | 0 | ModeDecisionCandidateBuffer** cand_bf_ptr_array = &(cand_bf_ptr_array_base[0]); |
1529 | 0 | ModeDecisionCandidateBuffer* cand_bf; |
1530 | | |
1531 | | // Set the Candidate Buffer |
1532 | 0 | cand_bf = cand_bf_ptr_array[0]; |
1533 | | // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr |
1534 | 0 | cand_bf->cand->block_mi.mode = blk_ptr->block_mi.mode; |
1535 | 0 | cand_bf->cand->block_mi.filter_intra_mode = blk_ptr->block_mi.filter_intra_mode; |
1536 | 0 | if (blk_ptr->block_has_coeff) { |
1537 | 0 | uint64_t y_txb_coeff_bits; |
1538 | 0 | uint64_t cb_txb_coeff_bits; |
1539 | 0 | uint64_t cr_txb_coeff_bits; |
1540 | 0 | svt_aom_txb_estimate_coeff_bits(md_ctx, |
1541 | 0 | 1, //allow_update_cdf, |
1542 | 0 | &pcs->ec_ctx_array[ctx->sb_index], |
1543 | 0 | pcs, |
1544 | 0 | cand_bf, |
1545 | 0 | ctx->coded_area_sb_update, |
1546 | 0 | ctx->coded_area_sb_uv_update, |
1547 | 0 | coeff_buffer_sb, |
1548 | 0 | blk_ptr->eob.y[txb_itr], |
1549 | 0 | blk_ptr->eob.u[txb_itr], |
1550 | 0 | blk_ptr->eob.v[txb_itr], |
1551 | 0 | &y_txb_coeff_bits, |
1552 | 0 | &cb_txb_coeff_bits, |
1553 | 0 | &cr_txb_coeff_bits, |
1554 | 0 | tx_size, |
1555 | 0 | tx_size_uv, |
1556 | 0 | blk_ptr->tx_type[txb_itr], |
1557 | 0 | blk_ptr->tx_type_uv, |
1558 | 0 | (md_ctx->has_uv && uv_pass) ? COMPONENT_ALL : COMPONENT_LUMA); |
1559 | 0 | } |
1560 | | |
1561 | | // Update the luma DC Sign Level Coeff Neighbor Array |
1562 | 0 | uint8_t dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.y[txb_itr]; |
1563 | |
|
1564 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_luma_dc_sign_level_coeff_na_update[tile_idx], |
1565 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1566 | 0 | txb_origin_x, |
1567 | 0 | txb_origin_y, |
1568 | 0 | tx_width, |
1569 | 0 | tx_height, |
1570 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1571 | | |
1572 | | // Update the Cb DC Sign Level Coeff Neighbor Array |
1573 | 0 | if (md_ctx->has_uv && uv_pass) { |
1574 | 0 | dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.u[txb_itr]; |
1575 | |
|
1576 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_cb_dc_sign_level_coeff_na_update[tile_idx], |
1577 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1578 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1579 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1580 | 0 | tx_width_uv, |
1581 | 0 | tx_height_uv, |
1582 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1583 | | |
1584 | | // Update the Cr DC Sign Level Coeff Neighbor Array |
1585 | 0 | dc_sign_level_coeff = (uint8_t)blk_ptr->quant_dc.v[txb_itr]; |
1586 | |
|
1587 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_cr_dc_sign_level_coeff_na_update[tile_idx], |
1588 | 0 | (uint8_t*)&dc_sign_level_coeff, |
1589 | 0 | ROUND_UV(txb_origin_x) >> 1, |
1590 | 0 | ROUND_UV(txb_origin_y) >> 1, |
1591 | 0 | tx_width_uv, |
1592 | 0 | tx_height_uv, |
1593 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1594 | 0 | } |
1595 | 0 | } |
1596 | | |
1597 | | // Update encode-related data for the passed block |
1598 | | // expects ctx->blk_geom, ctx->blk_ptr, ctx->blk_org_x, ctx->blk_org_y to be set |
1599 | 128k | static void update_b(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr, EcBlkStruct** output_blk_ptr) { |
1600 | 128k | ModeDecisionContext* md_ctx = ctx->md_ctx; |
1601 | 128k | const BlockGeom* blk_geom = ctx->blk_geom; |
1602 | 128k | SuperBlock* sb_ptr = md_ctx->sb_ptr; |
1603 | 128k | int sb_index = ctx->sb_index; |
1604 | 128k | const uint16_t tile_idx = ctx->tile_index; |
1605 | | |
1606 | 128k | if (!pcs->scs->allintra) { |
1607 | 0 | if (is_intra_mode(blk_ptr->block_mi.mode)) { |
1608 | 0 | ctx->tot_intra_coded_area += blk_geom->bwidth * blk_geom->bheight; |
1609 | 0 | pcs->sb_intra[sb_index] = 1; |
1610 | 0 | } else { |
1611 | 0 | if (pcs->ppcs->frm_hdr.allow_high_precision_mv) { |
1612 | 0 | bool hp = (blk_ptr->block_mi.mv[0].x % 2 != 0 || blk_ptr->block_mi.mv[0].y % 2 != 0); |
1613 | 0 | if (!hp && has_second_ref(&blk_ptr->block_mi)) { |
1614 | 0 | hp = (blk_ptr->block_mi.mv[1].x % 2 != 0 || blk_ptr->block_mi.mv[1].y % 2 != 0); |
1615 | 0 | } |
1616 | 0 | if (hp) { |
1617 | 0 | ctx->tot_hp_coded_area += blk_geom->bwidth * blk_geom->bheight; |
1618 | 0 | } |
1619 | 0 | } |
1620 | 0 | bool is_zero_mv = 0; |
1621 | 0 | if (abs(blk_ptr->block_mi.mv[0].x) < 8 && abs(blk_ptr->block_mi.mv[0].y) < 8) { |
1622 | 0 | is_zero_mv = 1; |
1623 | 0 | } |
1624 | 0 | if (has_second_ref(&blk_ptr->block_mi)) { |
1625 | 0 | if (abs(blk_ptr->block_mi.mv[1].x) < 8 && abs(blk_ptr->block_mi.mv[1].y) < 8) { |
1626 | 0 | is_zero_mv = 1; |
1627 | 0 | } |
1628 | 0 | } |
1629 | 0 | if (is_zero_mv) { |
1630 | 0 | ctx->tot_cnt_zero_mv += blk_geom->bwidth * blk_geom->bheight; |
1631 | 0 | } |
1632 | 0 | if (blk_geom->sq_size == pcs->scs->sb_size && blk_ptr->block_mi.mode != NEWMV && |
1633 | 0 | blk_ptr->block_mi.mode != NEW_NEWMV) { |
1634 | 0 | pcs->sb_64x64_mvp[sb_index] = 1; |
1635 | 0 | } |
1636 | 0 | } |
1637 | |
|
1638 | 0 | if (blk_ptr->block_has_coeff == 0) { |
1639 | 0 | ctx->tot_skip_coded_area += blk_geom->bwidth * blk_geom->bheight; |
1640 | 0 | } else { |
1641 | 0 | pcs->sb_skip[sb_index] = 0; |
1642 | 0 | } |
1643 | 0 | pcs->sb_min_sq_size[sb_index] = MIN(blk_geom->sq_size, pcs->sb_min_sq_size[sb_index]); |
1644 | 0 | pcs->sb_max_sq_size[sb_index] = MAX(blk_geom->sq_size, pcs->sb_max_sq_size[sb_index]); |
1645 | 0 | } |
1646 | 128k | svt_block_on_mutex(pcs->ppcs->pcs_total_rate_mutex); |
1647 | 128k | pcs->ppcs->pcs_total_rate += blk_ptr->total_rate; |
1648 | 128k | svt_release_mutex(pcs->ppcs->pcs_total_rate_mutex); |
1649 | | |
1650 | | // If needed, copy recon and qcoeffs from MD buffers to EC buffers and update coeff-related CDFs |
1651 | 128k | if (pcs->cdf_ctrl.update_coef || (md_ctx->bypass_encdec && !(md_ctx->fixed_partition))) { |
1652 | | // Copy recon to EncDec buffers if EncDec was bypassed; if pred depth only was used |
1653 | | // and NSQ is OFF data was copied directly to EncDec buffers in MD |
1654 | 120k | if (md_ctx->bypass_encdec && !(md_ctx->fixed_partition)) { |
1655 | 120k | copy_recon(pcs, md_ctx, blk_ptr); |
1656 | 120k | } |
1657 | | |
1658 | | // Initialize the Transform Loop |
1659 | 120k | const uint8_t tx_depth = blk_ptr->block_mi.tx_depth; |
1660 | 120k | const uint16_t txb_count = tx_blocks_per_depth[blk_geom->bsize][tx_depth]; |
1661 | 120k | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][blk_geom->bsize]; |
1662 | 120k | const int tx_width = tx_size_wide[tx_size]; |
1663 | 120k | const int tx_height = tx_size_high[tx_size]; |
1664 | 120k | const TxSize tx_size_uv = av1_get_max_uv_txsize(blk_geom->bsize, 1, 1); |
1665 | 120k | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
1666 | 120k | const int tx_height_uv = tx_size_high[tx_size_uv]; |
1667 | 120k | uint32_t blk_coded_area = 0; |
1668 | 120k | uint32_t blk_coded_area_uv = 0; |
1669 | 603k | for (ctx->txb_itr = 0; ctx->txb_itr < txb_count; ctx->txb_itr++) { |
1670 | 482k | const uint8_t uv_pass = tx_depth && ctx->txb_itr ? 0 : 1; //NM: 128x128 exeption |
1671 | | |
1672 | | // Copy quantized coeffs to EncDec buffers if EncDec was bypassed; if pred depth only was used |
1673 | | // and NSQ is OFF data was copied directly to EncDec buffers in MD |
1674 | 482k | if (md_ctx->bypass_encdec && !(md_ctx->fixed_partition)) { |
1675 | 482k | copy_qcoeffs(pcs, ctx, blk_ptr, blk_coded_area, blk_coded_area_uv); |
1676 | 482k | } |
1677 | | |
1678 | | // Perform CDF update (MD feature) if enabled |
1679 | 482k | if (pcs->cdf_ctrl.update_coef) { |
1680 | 0 | update_coeff_cdf(pcs, ctx, blk_ptr); |
1681 | 0 | } |
1682 | | |
1683 | 482k | blk_coded_area += tx_width * tx_height; |
1684 | 482k | ctx->coded_area_sb_update += tx_width * tx_height; |
1685 | | |
1686 | 482k | if (md_ctx->has_uv && uv_pass) { |
1687 | 120k | blk_coded_area_uv += tx_width_uv * tx_height_uv; |
1688 | 120k | ctx->coded_area_sb_uv_update += tx_width_uv * tx_height_uv; |
1689 | 120k | } |
1690 | 482k | } |
1691 | 120k | } |
1692 | 128k | if (!md_ctx->bypass_encdec) { |
1693 | 0 | md_ctx->blk_org_x = ctx->blk_org_x; |
1694 | 0 | md_ctx->blk_org_y = ctx->blk_org_y; |
1695 | 0 | md_ctx->blk_geom = ctx->blk_geom; |
1696 | 0 | svt_aom_update_mi_map_enc_dec(blk_ptr, md_ctx, pcs); |
1697 | 0 | } |
1698 | 128k | if (pcs->cdf_ctrl.update_se) { |
1699 | | // Update the partition Neighbor Array |
1700 | 0 | PartitionContext partition; |
1701 | 0 | partition.above = partition_context_lookup[blk_geom->bsize].above; |
1702 | 0 | partition.left = partition_context_lookup[blk_geom->bsize].left; |
1703 | |
|
1704 | 0 | svt_aom_neighbor_array_unit_mode_write(pcs->ep_partition_context_na[tile_idx], |
1705 | 0 | (uint8_t*)&partition, |
1706 | 0 | ctx->blk_org_x, |
1707 | 0 | ctx->blk_org_y, |
1708 | 0 | blk_geom->bwidth, |
1709 | 0 | blk_geom->bheight, |
1710 | 0 | NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); |
1711 | | |
1712 | | // Update the CDFs based on the current block |
1713 | 0 | blk_ptr->av1xd->tile_ctx = &pcs->ec_ctx_array[sb_index]; |
1714 | 0 | uint32_t txfm_context_left_index = get_neighbor_array_unit_left_index(pcs->ep_txfm_context_na[tile_idx], |
1715 | 0 | ctx->blk_org_y); |
1716 | 0 | uint32_t txfm_context_above_index = get_neighbor_array_unit_top_index(pcs->ep_txfm_context_na[tile_idx], |
1717 | 0 | ctx->blk_org_x); |
1718 | 0 | blk_ptr->av1xd->above_txfm_context = &(pcs->ep_txfm_context_na[tile_idx]->top_array[txfm_context_above_index]); |
1719 | 0 | blk_ptr->av1xd->left_txfm_context = &(pcs->ep_txfm_context_na[tile_idx]->left_array[txfm_context_left_index]); |
1720 | 0 | svt_aom_tx_size_bits(pcs, |
1721 | 0 | ctx->blk_ptr->segment_id, |
1722 | 0 | md_ctx->md_rate_est_ctx, |
1723 | 0 | blk_ptr->av1xd, |
1724 | 0 | blk_ptr->av1xd->mi[0], |
1725 | 0 | tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][blk_geom->bsize], |
1726 | 0 | pcs->ppcs->frm_hdr.tx_mode, |
1727 | 0 | blk_geom->bsize, |
1728 | 0 | !blk_ptr->block_has_coeff, |
1729 | 0 | &pcs->ec_ctx_array[sb_index], |
1730 | 0 | 1 /*allow_update_cdf*/); |
1731 | 0 | svt_aom_update_stats(pcs, blk_ptr, ctx->blk_org_y >> MI_SIZE_LOG2, ctx->blk_org_x >> MI_SIZE_LOG2); |
1732 | 0 | } |
1733 | | |
1734 | | // Copy final symbols and mode info from MD array to SB ptr |
1735 | | // Data will be overwritten each iteration, so copying is useful. Data is updated at EntropyCoding. |
1736 | 128k | sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt].av1xd = NULL; |
1737 | | // ENCDEC palette info buffer |
1738 | 128k | { |
1739 | 128k | if (svt_av1_allow_palette(pcs->ppcs->palette_level, blk_geom->bsize)) { |
1740 | 0 | ec_rtime_alloc_palette_info(&sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt]); |
1741 | 128k | } else { |
1742 | 128k | sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt].palette_info = NULL; |
1743 | 128k | } |
1744 | 128k | } |
1745 | 128k | BlkStruct* src_cu = blk_ptr; |
1746 | 128k | EcBlkStruct* dst_cu = &sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt]; |
1747 | 128k | *output_blk_ptr = &sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt]; |
1748 | 128k | svt_aom_move_blk_data(pcs, ctx, src_cu, dst_cu); |
1749 | 128k | sb_ptr->final_blk_arr[sb_ptr->final_blk_cnt++].av1xd = sb_ptr->av1xd; |
1750 | | // MFMV Update |
1751 | 128k | if (pcs->scs->mfmv_enabled && pcs->slice_type != I_SLICE && pcs->ppcs->is_ref) { |
1752 | 0 | uint32_t mi_stride = pcs->mi_stride; |
1753 | 0 | int32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2; |
1754 | 0 | int32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2; |
1755 | 0 | const int32_t offset = mi_row * mi_stride + mi_col; |
1756 | 0 | MbModeInfo* mbmi = pcs->mi_grid_base[offset]; |
1757 | 0 | const int x_mis = AOMMIN(ctx->blk_geom->bwidth >> MI_SIZE_LOG2, pcs->ppcs->av1_cm->mi_cols - mi_col); |
1758 | 0 | const int y_mis = AOMMIN(ctx->blk_geom->bheight >> MI_SIZE_LOG2, pcs->ppcs->av1_cm->mi_rows - mi_row); |
1759 | 0 | EbReferenceObject* obj_l0 = (EbReferenceObject*)pcs->ppcs->ref_pic_wrapper->object_ptr; |
1760 | |
|
1761 | 0 | av1_copy_frame_mvs(pcs, pcs->ppcs->av1_cm, mbmi[0], mi_row, mi_col, x_mis, y_mis, obj_l0); |
1762 | 0 | } |
1763 | 128k | } |
1764 | | |
1765 | | /******************************************* |
1766 | | * Encode Pass |
1767 | | * |
1768 | | * Summary: Performs an AV1 conformant encode/reconstruction |
1769 | | * for a block based on the pre-determined mode info. |
1770 | | * |
1771 | | * Inputs: |
1772 | | * SourcePic |
1773 | | * Coding Results |
1774 | | * SB Location |
1775 | | * Sequence Control Set |
1776 | | * Picture Control Set |
1777 | | * |
1778 | | * Outputs: |
1779 | | * Reconstructed Samples |
1780 | | * Coefficient Samples |
1781 | | * |
1782 | | *******************************************/ |
1783 | | static void encode_b(PictureControlSet* pcs, EncDecContext* ctx, BlkStruct* blk_ptr, EcBlkStruct** output_blk_ptr, |
1784 | 128k | const int mi_row, const int mi_col) { |
1785 | 128k | ModeDecisionContext* md_ctx = ctx->md_ctx; |
1786 | 128k | ctx->blk_geom = md_ctx->blk_geom = get_blk_geom_mds(pcs->scs->blk_geom_mds, blk_ptr->mds_idx); |
1787 | 128k | ctx->blk_ptr = md_ctx->blk_ptr = blk_ptr; |
1788 | 128k | ctx->blk_org_x = md_ctx->blk_org_x = mi_col << MI_SIZE_LOG2; |
1789 | 128k | ctx->blk_org_y = md_ctx->blk_org_y = mi_row << MI_SIZE_LOG2; |
1790 | 128k | md_ctx->has_uv = is_chroma_reference(mi_row, mi_col, md_ctx->blk_geom->bsize, 1, 1); |
1791 | 128k | if (ctx->md_ctx->bypass_encdec) { |
1792 | 128k | update_b(pcs, ctx, blk_ptr, output_blk_ptr); |
1793 | 128k | return; |
1794 | 128k | } |
1795 | | |
1796 | | /* ED should use the skip decision from MD. If MD signals 0 coeffs, the TX will |
1797 | | be bypassed unless MD did not perform chroma (blk_skip_decision) or the block is an |
1798 | | INTRA block (since the prediction at MD may not be conformant). */ |
1799 | 18.4E | ctx->md_skip_blk = md_ctx->blk_skip_decision |
1800 | 18.4E | ? ((is_intra_mode(blk_ptr->block_mi.mode) || blk_ptr->block_has_coeff) ? 0 : 1) |
1801 | 18.4E | : 0; |
1802 | 18.4E | blk_ptr->block_has_coeff = 0; |
1803 | | |
1804 | 18.4E | if (is_inter_block(&blk_ptr->block_mi)) { |
1805 | 0 | perform_inter_coding_loop(pcs, ctx); |
1806 | 18.4E | } else if (is_intra_mode(blk_ptr->block_mi.mode)) { |
1807 | 0 | if (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT && pcs->hbd_md == 0 && |
1808 | 0 | blk_ptr->palette_size[0] > 0) { |
1809 | | //MD was done on 8bit, scale palette colors to 10bit |
1810 | 0 | for (uint8_t col = 0; col < blk_ptr->palette_size[0]; col++) { |
1811 | 0 | blk_ptr->palette_info->pmi.palette_colors[col] *= 4; |
1812 | 0 | } |
1813 | 0 | } |
1814 | 0 | perform_intra_coding_loop(pcs, ctx); |
1815 | 18.4E | } else { |
1816 | 18.4E | EncodeContext* enc_ctx = pcs->scs->enc_ctx; |
1817 | 18.4E | CHECK_REPORT_ERROR_NC(enc_ctx->app_callback_ptr, EB_ENC_CL_ERROR2); |
1818 | 18.4E | } |
1819 | | |
1820 | 18.4E | if (pcs->ppcs->frm_hdr.allow_intrabc && ctx->is_16bit && (ctx->bit_depth == EB_EIGHT_BIT)) { |
1821 | 0 | svt_aom_convert_recon_16bit_to_8bit(pcs, ctx); |
1822 | 0 | } |
1823 | | |
1824 | | // Update block info and neighbour arrays needed for future blocks/pictures |
1825 | 18.4E | update_b(pcs, ctx, blk_ptr, output_blk_ptr); |
1826 | 18.4E | } |
1827 | | |
1828 | | void svt_aom_encode_sb(SequenceControlSet* scs, PictureControlSet* pcs, EncDecContext* ctx, SuperBlock* sb_ptr, |
1829 | 175k | PC_TREE* pc_tree, PARTITION_TREE* ptree, int mi_row, int mi_col) { |
1830 | 175k | if (mi_row >= pcs->ppcs->av1_cm->mi_rows || mi_col >= pcs->ppcs->av1_cm->mi_cols) { |
1831 | 0 | return; |
1832 | 0 | } |
1833 | | |
1834 | 175k | const BlockSize bsize = pc_tree->bsize; |
1835 | 175k | assert(bsize < BLOCK_SIZES_ALL); |
1836 | 175k | const int hbs = mi_size_wide[bsize] >> 1; |
1837 | 175k | const PartitionType partition = pc_tree->partition; |
1838 | 175k | const int quarter_step = mi_size_wide[bsize] >> 2; |
1839 | | |
1840 | 175k | ptree->partition = partition; |
1841 | 175k | ptree->bsize = bsize; |
1842 | 175k | ctx->md_ctx->shape = from_part_to_shape[partition]; |
1843 | 175k | if (pcs->cdf_ctrl.update_se) { |
1844 | | // Update the partition stats |
1845 | 0 | svt_aom_update_part_stats(pcs, partition, bsize, ctx->tile_index, ctx->sb_index, mi_row, mi_col); |
1846 | 0 | } |
1847 | | |
1848 | 175k | switch (partition) { |
1849 | 128k | case PARTITION_NONE: |
1850 | 128k | encode_b(pcs, ctx, pc_tree->block_data[PART_N][0], &ptree->blk_data[0], mi_row, mi_col); |
1851 | 128k | break; |
1852 | 0 | case PARTITION_HORZ: |
1853 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_H][0], &ptree->blk_data[0], mi_row, mi_col); |
1854 | 0 | if (mi_row + hbs < pcs->ppcs->av1_cm->mi_rows) { |
1855 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_H][1], &ptree->blk_data[1], mi_row + hbs, mi_col); |
1856 | 0 | } |
1857 | 0 | break; |
1858 | 0 | case PARTITION_VERT: |
1859 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_V][0], &ptree->blk_data[0], mi_row, mi_col); |
1860 | 0 | if (mi_col + hbs < pcs->ppcs->av1_cm->mi_cols) { |
1861 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_V][1], &ptree->blk_data[1], mi_row, mi_col + hbs); |
1862 | 0 | } |
1863 | 0 | break; |
1864 | 46.7k | case PARTITION_SPLIT: |
1865 | 233k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
1866 | 187k | const int x_idx = (i & 1) * hbs; |
1867 | 187k | const int y_idx = (i >> 1) * hbs; |
1868 | 187k | if (mi_row + y_idx >= pcs->ppcs->av1_cm->mi_rows || mi_col + x_idx >= pcs->ppcs->av1_cm->mi_cols) { |
1869 | 18.3k | continue; |
1870 | 18.3k | } |
1871 | 168k | svt_aom_encode_sb( |
1872 | 168k | scs, pcs, ctx, sb_ptr, pc_tree->split[i], ptree->sub_tree[i], mi_row + y_idx, mi_col + x_idx); |
1873 | 168k | } |
1874 | 46.7k | break; |
1875 | 0 | case PARTITION_HORZ_A: |
1876 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_HA][0], &ptree->blk_data[0], mi_row, mi_col); |
1877 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_HA][1], &ptree->blk_data[1], mi_row, mi_col + hbs); |
1878 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_HA][2], &ptree->blk_data[2], mi_row + hbs, mi_col); |
1879 | 0 | break; |
1880 | 0 | case PARTITION_HORZ_B: |
1881 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_HB][0], &ptree->blk_data[0], mi_row, mi_col); |
1882 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_HB][1], &ptree->blk_data[1], mi_row + hbs, mi_col); |
1883 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_HB][2], &ptree->blk_data[2], mi_row + hbs, mi_col + hbs); |
1884 | 0 | break; |
1885 | 0 | case PARTITION_VERT_A: |
1886 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_VA][0], &ptree->blk_data[0], mi_row, mi_col); |
1887 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_VA][1], &ptree->blk_data[1], mi_row + hbs, mi_col); |
1888 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_VA][2], &ptree->blk_data[2], mi_row, mi_col + hbs); |
1889 | 0 | break; |
1890 | 0 | case PARTITION_VERT_B: |
1891 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_VB][0], &ptree->blk_data[0], mi_row, mi_col); |
1892 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_VB][1], &ptree->blk_data[1], mi_row, mi_col + hbs); |
1893 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_VB][2], &ptree->blk_data[2], mi_row + hbs, mi_col + hbs); |
1894 | 0 | break; |
1895 | 0 | case PARTITION_HORZ_4: |
1896 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1897 | 0 | int this_mi_row = mi_row + i * quarter_step; |
1898 | 0 | if (i > 0 && this_mi_row >= pcs->ppcs->av1_cm->mi_rows) { |
1899 | | // Only the last block is able to be outside the picture boundary. If one of the first |
1900 | | // 3 blocks is outside the boundary, H4 is not a valid partition (see AV1 spec 5.11.4) |
1901 | 0 | assert(i == 3); |
1902 | 0 | break; |
1903 | 0 | } |
1904 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_H4][i], &ptree->blk_data[i], this_mi_row, mi_col); |
1905 | 0 | } |
1906 | 0 | break; |
1907 | 0 | case PARTITION_VERT_4: |
1908 | 0 | for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { |
1909 | 0 | int this_mi_col = mi_col + i * quarter_step; |
1910 | 0 | if (i > 0 && this_mi_col >= pcs->ppcs->av1_cm->mi_cols) { |
1911 | | // Only the last block is able to be outside the picture boundary. If one of the first |
1912 | | // 3 blocks is outside the boundary, H4 is not a valid partition (see AV1 spec 5.11.4) |
1913 | 0 | assert(i == 3); |
1914 | 0 | break; |
1915 | 0 | } |
1916 | 0 | encode_b(pcs, ctx, pc_tree->block_data[PART_V4][i], &ptree->blk_data[i], mi_row, this_mi_col); |
1917 | 0 | } |
1918 | 0 | break; |
1919 | 0 | default: |
1920 | | assert(0 && "Invalid partition type."); |
1921 | 0 | break; |
1922 | 175k | } |
1923 | 175k | } |