/src/libavc/common/svc/isvc_iquant_itrans_recon.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2022 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ih264_iquant_itrans_recon.c |
24 | | * |
25 | | * @brief |
26 | | * Contains definition of functions for h264 inverse quantization inverse |
27 | | *transformation and recon |
28 | | * |
29 | | * @author |
30 | | * Ittiam |
31 | | * |
32 | | * @par List of Functions: |
33 | | * - ih264_iquant_itrans_recon_4x4() |
34 | | * - ih264_iquant_itrans_recon_8x8() |
35 | | * - ih264_iquant_itrans_recon_4x4_dc() |
36 | | * - ih264_iquant_itrans_recon_8x8_dc() |
37 | | * - ih264_iquant_itrans_recon_chroma_4x4() |
38 | | * -ih264_iquant_itrans_recon_chroma_4x4_dc() |
39 | | * |
40 | | * @remarks |
41 | | * |
42 | | ******************************************************************************* |
43 | | */ |
44 | | |
45 | | /*****************************************************************************/ |
46 | | /* File Includes */ |
47 | | /*****************************************************************************/ |
48 | | #include <stdint.h> |
49 | | |
50 | | #include "ih264_typedefs.h" |
51 | | #include "ih264_debug.h" |
52 | | #include "ih264_defs.h" |
53 | | #include "ih264_trans_macros.h" |
54 | | #include "ih264_macros.h" |
55 | | #include "ih264_platform_macros.h" |
56 | | #include "ih264_trans_data.h" |
57 | | #include "ih264_size_defs.h" |
58 | | #include "ih264_structs.h" |
59 | | #include "isvc_trans_quant_itrans_iquant.h" |
60 | | |
61 | | /* |
62 | | ******************************************************************************** |
63 | | * |
64 | | * @brief This function reconstructs a 4x4 sub block from quantized resiude and |
65 | | * prediction buffer |
66 | | * |
67 | | * @par Description: |
68 | | * The quantized residue is first inverse quantized, then inverse transformed. |
69 | | * This inverse transformed content is added to the prediction buffer to recon- |
70 | | * struct the end output |
71 | | * |
72 | | * @param[in] pi2_src |
73 | | * quantized 4x4 block |
74 | | * |
75 | | * @param[in] pu1_pred |
76 | | * prediction 4x4 block |
77 | | * |
78 | | * @param[in] pi2_res |
79 | | * residue 4x4 block |
80 | | * |
81 | | * @param[out] pu1_out |
82 | | * reconstructed 4x4 block |
83 | | * |
84 | | * @param[in] src_strd |
85 | | * quantization buffer stride |
86 | | * |
87 | | * @param[in] i4_pred_stride, |
88 | | * Prediction buffer stride |
89 | | * |
90 | | * @param[in] i4_out_stride |
91 | | * recon buffer Stride |
92 | | * |
93 | | * @param[in] i4_res_stride |
94 | | * residue buffer Stride |
95 | | * |
96 | | * @param[in] pu2_scaling_list |
97 | | * pointer to scaling list |
98 | | * |
99 | | * @param[in] pu2_norm_adjust |
100 | | * pointer to inverse scale matrix |
101 | | * |
102 | | * @param[in] u4_qp_div_6 |
103 | | * Floor (qp/6) |
104 | | * |
105 | | * @param[in] pi2_tmp |
106 | | * temporary buffer of size 1*16 |
107 | | * |
108 | | * @returns none |
109 | | * |
110 | | * @remarks none |
111 | | * |
112 | | ******************************************************************************* |
113 | | */ |
114 | | void isvc_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
115 | | buffer_container_t *ps_res_pred, buffer_container_t *ps_res, |
116 | | buffer_container_t *ps_rec, |
117 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, |
118 | | WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, |
119 | | UWORD8 u1_res_accumulate) |
120 | 7.94M | { |
121 | 7.94M | WORD16 x0, x1, x2, x3, i; |
122 | 7.94M | WORD32 q0, q1, q2, q3; |
123 | 7.94M | WORD16 i_macro; |
124 | | |
125 | 7.94M | WORD16 *pi2_src = ps_src->pv_data; |
126 | 7.94M | WORD16 *pi2_res = ps_res->pv_data; |
127 | 7.94M | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
128 | 7.94M | UWORD8 *pu1_pred = ps_pred->pv_data; |
129 | 7.94M | UWORD8 *pu1_out = ps_rec->pv_data; |
130 | 7.94M | WORD32 i4_src_stride = ps_src->i4_data_stride; |
131 | 7.94M | WORD32 i4_res_stride = ps_res->i4_data_stride; |
132 | 7.94M | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
133 | 7.94M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
134 | 7.94M | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
135 | 7.94M | const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; |
136 | 7.94M | const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; |
137 | 7.94M | UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; |
138 | 7.94M | WORD16 *pi2_src_ptr = pi2_src; |
139 | 7.94M | WORD16 *pi2_tmp_ptr = pi2_tmp; |
140 | 7.94M | UWORD8 *pu1_pred_ptr = pu1_pred; |
141 | 7.94M | WORD16 *pi2_res_ptr = pi2_res; |
142 | 7.94M | WORD16 *pi2_res_pred_ptr = pi2_res_pred; |
143 | 7.94M | UWORD8 *pu1_out_ptr = pu1_out; |
144 | 7.94M | WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; |
145 | | |
146 | | /* inverse quant */ |
147 | | /*horizontal inverse transform */ |
148 | 39.8M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
149 | 31.8M | { |
150 | 31.8M | q0 = pi2_src_ptr[0]; |
151 | 31.8M | INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); |
152 | 31.8M | if(i == 0 && i4_iq_start_idx == 1) q0 = pi2_dc_src[0]; // Restoring dc value for intra case |
153 | | |
154 | 31.8M | q2 = pi2_src_ptr[2]; |
155 | 31.8M | INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4); |
156 | | |
157 | 31.8M | x0 = q0 + q2; |
158 | 31.8M | x1 = q0 - q2; |
159 | | |
160 | 31.8M | q1 = pi2_src_ptr[1]; |
161 | 31.8M | INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4); |
162 | | |
163 | 31.8M | q3 = pi2_src_ptr[3]; |
164 | 31.8M | INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4); |
165 | | |
166 | 31.8M | x2 = (q1 >> 1) - q3; |
167 | 31.8M | x3 = q1 + (q3 >> 1); |
168 | | |
169 | 31.8M | pi2_tmp_ptr[0] = x0 + x3; |
170 | 31.8M | pi2_tmp_ptr[1] = x1 + x2; |
171 | 31.8M | pi2_tmp_ptr[2] = x1 - x2; |
172 | 31.8M | pi2_tmp_ptr[3] = x0 - x3; |
173 | | |
174 | 31.8M | pi2_src_ptr += i4_src_stride; |
175 | 31.8M | pi2_tmp_ptr += SUB_BLK_WIDTH_4x4; |
176 | 31.8M | pu2_iscal_mat += SUB_BLK_WIDTH_4x4; |
177 | 31.8M | pu2_weigh_mat += SUB_BLK_WIDTH_4x4; |
178 | 31.8M | } |
179 | | |
180 | | /* vertical inverse transform */ |
181 | 7.94M | pi2_tmp_ptr = pi2_tmp; |
182 | 40.7M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
183 | 32.8M | { |
184 | 32.8M | pu1_pred_ptr = pu1_pred; |
185 | 32.8M | pi2_res_ptr = pi2_res; |
186 | 32.8M | pi2_res_pred_ptr = pi2_res_pred; |
187 | 32.8M | pu1_out = pu1_out_ptr; |
188 | | |
189 | 32.8M | x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]); |
190 | 32.8M | x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]); |
191 | 32.8M | x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12]; |
192 | 32.8M | x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1); |
193 | | |
194 | | /* inverse prediction */ |
195 | 32.8M | i_macro = x0 + x3; |
196 | 32.8M | i_macro = ((i_macro + 32) >> 6); |
197 | 32.8M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
198 | 32.8M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
199 | 32.8M | pu1_pred_ptr += i4_pred_stride; |
200 | 32.8M | pu1_out += i4_out_stride; |
201 | 32.8M | pi2_res_ptr += i4_res_stride; |
202 | 32.8M | pi2_res_pred_ptr += i4_res_pred_stride; |
203 | | |
204 | 32.8M | i_macro = x1 + x2; |
205 | 32.8M | i_macro = ((i_macro + 32) >> 6); |
206 | 32.8M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
207 | 32.8M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
208 | 32.8M | pu1_pred_ptr += i4_pred_stride; |
209 | 32.8M | pu1_out += i4_out_stride; |
210 | 32.8M | pi2_res_ptr += i4_res_stride; |
211 | 32.8M | pi2_res_pred_ptr += i4_res_pred_stride; |
212 | | |
213 | 32.8M | i_macro = x1 - x2; |
214 | 32.8M | i_macro = ((i_macro + 32) >> 6); |
215 | 32.8M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
216 | 32.8M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
217 | 32.8M | pu1_pred_ptr += i4_pred_stride; |
218 | 32.8M | pu1_out += i4_out_stride; |
219 | 32.8M | pi2_res_ptr += i4_res_stride; |
220 | 32.8M | pi2_res_pred_ptr += i4_res_pred_stride; |
221 | | |
222 | 32.8M | i_macro = x0 - x3; |
223 | 32.8M | i_macro = ((i_macro + 32) >> 6); |
224 | 32.8M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
225 | 32.8M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
226 | | |
227 | 32.8M | pi2_tmp_ptr++; |
228 | 32.8M | pu1_out_ptr++; |
229 | 32.8M | pu1_pred++; |
230 | 32.8M | pi2_res++; |
231 | 32.8M | pi2_res_pred++; |
232 | 32.8M | } |
233 | 7.94M | } |
234 | | |
235 | | void isvc_iquant_itrans_recon_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
236 | | buffer_container_t *ps_res_pred, buffer_container_t *ps_res, |
237 | | buffer_container_t *ps_rec, |
238 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, |
239 | | WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, |
240 | | UWORD8 u1_res_accumulate) |
241 | 10.0M | { |
242 | 10.0M | WORD16 *pi2_src = ps_src->pv_data; |
243 | 10.0M | WORD16 *pi2_res = ps_res->pv_data; |
244 | 10.0M | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
245 | 10.0M | UWORD8 *pu1_pred = ps_pred->pv_data; |
246 | 10.0M | UWORD8 *pu1_out = ps_rec->pv_data; |
247 | 10.0M | WORD32 i4_res_stride = ps_res->i4_data_stride; |
248 | 10.0M | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
249 | 10.0M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
250 | 10.0M | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
251 | 10.0M | const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; |
252 | 10.0M | const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; |
253 | 10.0M | UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; |
254 | 10.0M | UWORD8 *pu1_pred_ptr = pu1_pred; |
255 | 10.0M | WORD16 *pi2_res_ptr = pi2_res; |
256 | 10.0M | WORD16 *pi2_res_pred_ptr = pi2_res_pred; |
257 | 10.0M | UWORD8 *pu1_out_ptr = pu1_out; |
258 | 10.0M | WORD32 q0; |
259 | 10.0M | WORD16 i_macro, i; |
260 | 10.0M | WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; |
261 | 10.0M | UNUSED(pi2_tmp); |
262 | | |
263 | 10.0M | if(i4_iq_start_idx == 0) |
264 | 0 | { |
265 | 0 | q0 = pi2_src[0]; |
266 | 0 | INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); |
267 | 0 | } |
268 | 10.0M | else |
269 | 10.0M | { |
270 | 10.0M | q0 = pi2_dc_src[0]; // Restoring dc value for intra case3 |
271 | 10.0M | } |
272 | 10.0M | i_macro = ((q0 + 32) >> 6); |
273 | 49.7M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
274 | 39.6M | { |
275 | 39.6M | pu1_pred_ptr = pu1_pred; |
276 | 39.6M | pi2_res_ptr = pi2_res; |
277 | 39.6M | pi2_res_pred_ptr = pi2_res_pred; |
278 | 39.6M | pu1_out = pu1_out_ptr; |
279 | | |
280 | | /* inverse prediction */ |
281 | 39.6M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
282 | 39.6M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
283 | 39.6M | pu1_pred_ptr += i4_pred_stride; |
284 | 39.6M | pu1_out += i4_out_stride; |
285 | 39.6M | pi2_res_ptr += i4_res_stride; |
286 | 39.6M | pi2_res_pred_ptr += i4_res_pred_stride; |
287 | | |
288 | 39.6M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
289 | 39.6M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
290 | 39.6M | pu1_pred_ptr += i4_pred_stride; |
291 | 39.6M | pu1_out += i4_out_stride; |
292 | 39.6M | pi2_res_ptr += i4_res_stride; |
293 | 39.6M | pi2_res_pred_ptr += i4_res_pred_stride; |
294 | | |
295 | 39.6M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
296 | 39.6M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
297 | 39.6M | pu1_pred_ptr += i4_pred_stride; |
298 | 39.6M | pu1_out += i4_out_stride; |
299 | 39.6M | pi2_res_ptr += i4_res_stride; |
300 | 39.6M | pi2_res_pred_ptr += i4_res_pred_stride; |
301 | | |
302 | 39.6M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
303 | 39.6M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
304 | | |
305 | 39.6M | pu1_out_ptr++; |
306 | 39.6M | pu1_pred++; |
307 | 39.6M | pi2_res++; |
308 | 39.6M | pi2_res_pred++; |
309 | 39.6M | } |
310 | 10.0M | } |
311 | | |
312 | | /** |
313 | | ******************************************************************************* |
314 | | * |
315 | | * @brief |
316 | | * This function performs inverse quant and Inverse transform type Ci4 for 8x8 |
317 | | *block |
318 | | * |
319 | | * @par Description: |
320 | | * Performs inverse transform Ci8 and adds the residue to get the |
321 | | * reconstructed block |
322 | | * |
323 | | * @param[in] pi2_src |
324 | | * Input 8x8coefficients |
325 | | * |
326 | | * @param[in] pu1_pred |
327 | | * Prediction 8x8 block |
328 | | * |
329 | | * @param[out] pu1_recon |
330 | | * Output 8x8 block |
331 | | * |
332 | | * @param[in] q_div |
333 | | * QP/6 |
334 | | * |
335 | | * @param[in] q_rem |
336 | | * QP%6 |
337 | | * |
338 | | * @param[in] q_lev |
339 | | * Quantizer level |
340 | | * |
341 | | * @param[in] src_strd |
342 | | * Input stride |
343 | | * |
344 | | * @param[in] i4_pred_stride, |
345 | | * Prediction stride |
346 | | * |
347 | | * @param[in] i4_out_stride |
348 | | * Output Stride |
349 | | * |
350 | | * @param[in] pi4_tmp |
351 | | * temporary buffer of size 1*16 we dont need a bigger blcok since we reuse |
352 | | * the tmp for each block |
353 | | * |
354 | | * @param[in] pu4_iquant_mat |
355 | | * Pointer to the inverse quantization matrix |
356 | | * |
357 | | * @returns Void |
358 | | * |
359 | | * @remarks |
360 | | * None |
361 | | * |
362 | | ******************************************************************************* |
363 | | */ |
364 | | void isvc_iquant_itrans_recon_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
365 | | buffer_container_t *ps_res_pred, buffer_container_t *ps_res, |
366 | | buffer_container_t *ps_rec, |
367 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, |
368 | | WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, |
369 | | UWORD8 u1_res_accumulate) |
370 | 0 | { |
371 | 0 | WORD32 i; |
372 | 0 | WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7; |
373 | 0 | WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7; |
374 | 0 | WORD16 i_macro; |
375 | 0 | WORD32 q; |
376 | |
|
377 | 0 | WORD16 *pi2_src = ps_src->pv_data; |
378 | 0 | WORD16 *pi2_res = ps_res->pv_data; |
379 | 0 | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
380 | 0 | UWORD8 *pu1_pred = ps_pred->pv_data; |
381 | 0 | UWORD8 *pu1_out = ps_rec->pv_data; |
382 | 0 | WORD32 i4_res_stride = ps_res->i4_data_stride; |
383 | 0 | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
384 | 0 | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
385 | 0 | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
386 | 0 | const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; |
387 | 0 | const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; |
388 | 0 | UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; |
389 | 0 | WORD16 *pi2_tmp_ptr = pi2_tmp; |
390 | 0 | UWORD8 *pu1_pred_ptr = pu1_pred; |
391 | 0 | WORD16 *pi2_res_ptr = pi2_res; |
392 | 0 | WORD16 *pi2_res_pred_ptr = pi2_res_pred; |
393 | 0 | UWORD8 *pu1_out_ptr = pu1_out; |
394 | 0 | WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0; |
395 | 0 | UNUSED(i4_iq_start_idx); |
396 | 0 | UNUSED(pi2_dc_src); |
397 | |
|
398 | 0 | ASSERT(ps_src->i4_data_stride == SUB_BLK_WIDTH_8x8); |
399 | | |
400 | | /*************************************************************/ |
401 | | /* De quantization of coefficients. Will be replaced by SIMD */ |
402 | | /* operations on platform. Note : DC coeff is not scaled */ |
403 | | /*************************************************************/ |
404 | 0 | for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++) |
405 | 0 | { |
406 | 0 | q = pi2_src[i]; |
407 | 0 | INV_QUANT(q, pu2_iscal_mat[i], pu2_weigh_mat[i], u4_qp_div_6, rnd_fact, 6); |
408 | 0 | pi2_tmp_ptr[i] = q; |
409 | 0 | } |
410 | | |
411 | | /* Perform Inverse transform */ |
412 | | /*--------------------------------------------------------------------*/ |
413 | | /* IDCT [ Horizontal transformation ] */ |
414 | | /*--------------------------------------------------------------------*/ |
415 | 0 | for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) |
416 | 0 | { |
417 | | /*------------------------------------------------------------------*/ |
418 | | /* y0 = w0 + w4 */ |
419 | | /* y1 = -w3 + w5 - w7 - (w7 >> 1) */ |
420 | | /* y2 = w0 - w4 */ |
421 | | /* y3 = w1 + w7 - w3 - (w3 >> 1) */ |
422 | | /* y4 = (w2 >> 1) - w6 */ |
423 | | /* y5 = -w1 + w7 + w5 + (w5 >> 1) */ |
424 | | /* y6 = w2 + (w6 >> 1) */ |
425 | | /* y7 = w3 + w5 + w1 + (w1 >> 1) */ |
426 | | /*------------------------------------------------------------------*/ |
427 | 0 | i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]); |
428 | |
|
429 | 0 | i_y1 = |
430 | 0 | ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1)); |
431 | |
|
432 | 0 | i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]); |
433 | |
|
434 | 0 | i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1)); |
435 | |
|
436 | 0 | i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]); |
437 | |
|
438 | 0 | i_y5 = |
439 | 0 | ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1)); |
440 | |
|
441 | 0 | i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1)); |
442 | |
|
443 | 0 | i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1)); |
444 | | |
445 | | /*------------------------------------------------------------------*/ |
446 | | /* z0 = y0 + y6 */ |
447 | | /* z1 = y1 + (y7 >> 2) */ |
448 | | /* z2 = y2 + y4 */ |
449 | | /* z3 = y3 + (y5 >> 2) */ |
450 | | /* z4 = y2 - y4 */ |
451 | | /* z5 = (y3 >> 2) - y5 */ |
452 | | /* z6 = y0 - y6 */ |
453 | | /* z7 = y7 - (y1 >> 2) */ |
454 | | /*------------------------------------------------------------------*/ |
455 | 0 | i_z0 = i_y0 + i_y6; |
456 | 0 | i_z1 = i_y1 + (i_y7 >> 2); |
457 | 0 | i_z2 = i_y2 + i_y4; |
458 | 0 | i_z3 = i_y3 + (i_y5 >> 2); |
459 | 0 | i_z4 = i_y2 - i_y4; |
460 | 0 | i_z5 = (i_y3 >> 2) - i_y5; |
461 | 0 | i_z6 = i_y0 - i_y6; |
462 | 0 | i_z7 = i_y7 - (i_y1 >> 2); |
463 | | |
464 | | /*------------------------------------------------------------------*/ |
465 | | /* x0 = z0 + z7 */ |
466 | | /* x1 = z2 + z5 */ |
467 | | /* x2 = z4 + z3 */ |
468 | | /* x3 = z6 + z1 */ |
469 | | /* x4 = z6 - z1 */ |
470 | | /* x5 = z4 - z3 */ |
471 | | /* x6 = z2 - z5 */ |
472 | | /* x7 = z0 - z7 */ |
473 | | /*------------------------------------------------------------------*/ |
474 | 0 | pi2_tmp_ptr[0] = i_z0 + i_z7; |
475 | 0 | pi2_tmp_ptr[1] = i_z2 + i_z5; |
476 | 0 | pi2_tmp_ptr[2] = i_z4 + i_z3; |
477 | 0 | pi2_tmp_ptr[3] = i_z6 + i_z1; |
478 | 0 | pi2_tmp_ptr[4] = i_z6 - i_z1; |
479 | 0 | pi2_tmp_ptr[5] = i_z4 - i_z3; |
480 | 0 | pi2_tmp_ptr[6] = i_z2 - i_z5; |
481 | 0 | pi2_tmp_ptr[7] = i_z0 - i_z7; |
482 | | |
483 | | /* move to the next row */ |
484 | | // pi2_src_ptr += SUB_BLK_WIDTH_8x8; |
485 | 0 | pi2_tmp_ptr += SUB_BLK_WIDTH_8x8; |
486 | 0 | } |
487 | | /*--------------------------------------------------------------------*/ |
488 | | /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ |
489 | | /* */ |
490 | | /* Add the prediction and store it back to reconstructed frame buffer */ |
491 | | /* [Prediction buffer itself in this case] */ |
492 | | /*--------------------------------------------------------------------*/ |
493 | |
|
494 | 0 | pi2_tmp_ptr = pi2_tmp; |
495 | 0 | for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) |
496 | 0 | { |
497 | 0 | pu1_pred_ptr = pu1_pred; |
498 | 0 | pi2_res_ptr = pi2_res; |
499 | 0 | pi2_res_pred_ptr = pi2_res_pred; |
500 | 0 | pu1_out = pu1_out_ptr; |
501 | | /*------------------------------------------------------------------*/ |
502 | | /* y0j = w0j + w4j */ |
503 | | /* y1j = -w3j + w5j -w7j -(w7j >> 1) */ |
504 | | /* y2j = w0j -w4j */ |
505 | | /* y3j = w1j + w7j -w3j -(w3j >> 1) */ |
506 | | /* y4j = ( w2j >> 1 ) -w6j */ |
507 | | /* y5j = -w1j + w7j + w5j + (w5j >> 1) */ |
508 | | /* y6j = w2j + ( w6j >> 1 ) */ |
509 | | /* y7j = w3j + w5j + w1j + (w1j >> 1) */ |
510 | | /*------------------------------------------------------------------*/ |
511 | 0 | i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32]; |
512 | |
|
513 | 0 | i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] - |
514 | 0 | (pi2_tmp_ptr[56] >> 1); |
515 | |
|
516 | 0 | i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32]; |
517 | |
|
518 | 0 | i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1); |
519 | |
|
520 | 0 | i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48]; |
521 | |
|
522 | 0 | i_y5 = |
523 | 0 | (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1); |
524 | |
|
525 | 0 | i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1); |
526 | |
|
527 | 0 | i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1); |
528 | | |
529 | | /*------------------------------------------------------------------*/ |
530 | | /* z0j = y0j + y6j */ |
531 | | /* z1j = y1j + (y7j >> 2) */ |
532 | | /* z2j = y2j + y4j */ |
533 | | /* z3j = y3j + (y5j >> 2) */ |
534 | | /* z4j = y2j -y4j */ |
535 | | /* z5j = (y3j >> 2) -y5j */ |
536 | | /* z6j = y0j -y6j */ |
537 | | /* z7j = y7j -(y1j >> 2) */ |
538 | | /*------------------------------------------------------------------*/ |
539 | 0 | i_z0 = i_y0 + i_y6; |
540 | 0 | i_z1 = i_y1 + (i_y7 >> 2); |
541 | 0 | i_z2 = i_y2 + i_y4; |
542 | 0 | i_z3 = i_y3 + (i_y5 >> 2); |
543 | 0 | i_z4 = i_y2 - i_y4; |
544 | 0 | i_z5 = (i_y3 >> 2) - i_y5; |
545 | 0 | i_z6 = i_y0 - i_y6; |
546 | 0 | i_z7 = i_y7 - (i_y1 >> 2); |
547 | | |
548 | | /*------------------------------------------------------------------*/ |
549 | | /* x0j = z0j + z7j */ |
550 | | /* x1j = z2j + z5j */ |
551 | | /* x2j = z4j + z3j */ |
552 | | /* x3j = z6j + z1j */ |
553 | | /* x4j = z6j -z1j */ |
554 | | /* x5j = z4j -z3j */ |
555 | | /* x6j = z2j -z5j */ |
556 | | /* x7j = z0j -z7j */ |
557 | | /*------------------------------------------------------------------*/ |
558 | 0 | i_macro = ((i_z0 + i_z7 + 32) >> 6); |
559 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
560 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
561 | | /* Change uc_recBuffer to Point to next element in the same column*/ |
562 | 0 | pu1_pred_ptr += i4_pred_stride; |
563 | 0 | pu1_out += i4_out_stride; |
564 | 0 | pi2_res_ptr += i4_res_stride; |
565 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
566 | |
|
567 | 0 | i_macro = ((i_z2 + i_z5 + 32) >> 6); |
568 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
569 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
570 | 0 | pu1_pred_ptr += i4_pred_stride; |
571 | 0 | pu1_out += i4_out_stride; |
572 | 0 | pi2_res_ptr += i4_res_stride; |
573 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
574 | |
|
575 | 0 | i_macro = ((i_z4 + i_z3 + 32) >> 6); |
576 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
577 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
578 | 0 | pu1_pred_ptr += i4_pred_stride; |
579 | 0 | pu1_out += i4_out_stride; |
580 | 0 | pi2_res_ptr += i4_res_stride; |
581 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
582 | |
|
583 | 0 | i_macro = ((i_z6 + i_z1 + 32) >> 6); |
584 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
585 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
586 | 0 | pu1_pred_ptr += i4_pred_stride; |
587 | 0 | pu1_out += i4_out_stride; |
588 | 0 | pi2_res_ptr += i4_res_stride; |
589 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
590 | |
|
591 | 0 | i_macro = ((i_z6 - i_z1 + 32) >> 6); |
592 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
593 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
594 | 0 | pu1_pred_ptr += i4_pred_stride; |
595 | 0 | pu1_out += i4_out_stride; |
596 | 0 | pi2_res_ptr += i4_res_stride; |
597 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
598 | |
|
599 | 0 | i_macro = ((i_z4 - i_z3 + 32) >> 6); |
600 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
601 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
602 | 0 | pu1_pred_ptr += i4_pred_stride; |
603 | 0 | pu1_out += i4_out_stride; |
604 | 0 | pi2_res_ptr += i4_res_stride; |
605 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
606 | |
|
607 | 0 | i_macro = ((i_z2 - i_z5 + 32) >> 6); |
608 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
609 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
610 | 0 | pu1_pred_ptr += i4_pred_stride; |
611 | 0 | pu1_out += i4_out_stride; |
612 | 0 | pi2_res_ptr += i4_res_stride; |
613 | 0 | pi2_res_pred_ptr += i4_res_pred_stride; |
614 | |
|
615 | 0 | i_macro = ((i_z0 - i_z7 + 32) >> 6); |
616 | 0 | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
617 | 0 | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
618 | |
|
619 | 0 | pi2_tmp_ptr++; |
620 | 0 | pu1_out_ptr++; |
621 | 0 | pu1_pred++; |
622 | 0 | pi2_res++; |
623 | 0 | pi2_res_pred++; |
624 | 0 | } |
625 | 0 | } |
626 | | |
627 | | /* |
628 | | ******************************************************************************** |
629 | | * |
630 | | * @brief This function reconstructs a 4x4 sub block from quantized resiude and |
631 | | * prediction buffer |
632 | | * |
633 | | * @par Description: |
634 | | * The quantized residue is first inverse quantized, then inverse transformed. |
635 | | * This inverse transformed content is added to the prediction buffer to recon- |
636 | | * struct the end output |
637 | | * |
638 | | * @param[in] pi2_src |
639 | | * quantized 4x4 block |
640 | | * |
641 | | * @param[in] pu1_pred |
642 | | * prediction 4x4 block |
643 | | * |
644 | | * @param[out] pu1_out |
645 | | * reconstructed 4x4 block |
646 | | * |
647 | | * @param[in] src_strd |
648 | | * quantization buffer stride |
649 | | * |
650 | | * @param[in] i4_pred_stride, |
651 | | * Prediction buffer stride |
652 | | * |
653 | | * @param[in] i4_out_stride |
654 | | * recon buffer Stride |
655 | | * |
656 | | * @param[in] pu2_scaling_list |
657 | | * pointer to scaling list |
658 | | * |
659 | | * @param[in] pu2_norm_adjust |
660 | | * pointer to inverse scale matrix |
661 | | * |
662 | | * @param[in] u4_qp_div_6 |
663 | | * Floor (qp/6) |
664 | | * |
665 | | * @param[in] pi4_tmp |
666 | | * temporary buffer of size 1*16 |
667 | | * |
668 | | * @returns none |
669 | | * |
670 | | * @remarks none |
671 | | * |
672 | | ******************************************************************************* |
673 | | */ |
674 | | void isvc_iquant_itrans_recon_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
675 | | buffer_container_t *ps_res_pred, |
676 | | buffer_container_t *ps_res, buffer_container_t *ps_rec, |
677 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, |
678 | | WORD16 *pi2_tmp, WORD16 *pi2_dc_src, |
679 | | WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) |
680 | 1.41M | { |
681 | 1.41M | WORD16 x0, x1, x2, x3, i; |
682 | 1.41M | WORD32 q0, q1, q2, q3; |
683 | 1.41M | WORD16 i_macro; |
684 | | |
685 | 1.41M | WORD16 *pi2_src = ps_src->pv_data; |
686 | 1.41M | WORD16 *pi2_res = ps_res->pv_data; |
687 | 1.41M | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
688 | 1.41M | UWORD8 *pu1_pred = ps_pred->pv_data; |
689 | 1.41M | UWORD8 *pu1_out = ps_rec->pv_data; |
690 | 1.41M | WORD32 i4_src_stride = ps_src->i4_data_stride; |
691 | 1.41M | WORD32 i4_res_stride = ps_res->i4_data_stride; |
692 | 1.41M | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
693 | 1.41M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
694 | 1.41M | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
695 | 1.41M | const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; |
696 | 1.41M | const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; |
697 | 1.41M | UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; |
698 | 1.41M | WORD16 *pi2_src_ptr = pi2_src; |
699 | 1.41M | WORD16 *pi2_tmp_ptr = pi2_tmp; |
700 | 1.41M | UWORD8 *pu1_pred_ptr = pu1_pred; |
701 | 1.41M | WORD16 *pi2_res_ptr = pi2_res; |
702 | 1.41M | WORD16 *pi2_res_pred_ptr = pi2_res_pred; |
703 | 1.41M | UWORD8 *pu1_out_ptr = pu1_out; |
704 | 1.41M | WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; |
705 | | |
706 | 1.41M | UNUSED(i4_iq_start_idx); |
707 | | |
708 | | /* inverse quant */ |
709 | | /*horizontal inverse transform */ |
710 | 7.09M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
711 | 5.67M | { |
712 | 5.67M | if(i == 0) |
713 | 1.42M | { |
714 | 1.42M | q0 = pi2_dc_src[0]; |
715 | 1.42M | } |
716 | 4.25M | else |
717 | 4.25M | { |
718 | 4.25M | q0 = pi2_src_ptr[0]; |
719 | 4.25M | INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); |
720 | 4.25M | } |
721 | | |
722 | 5.67M | q2 = pi2_src_ptr[2]; |
723 | 5.67M | INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4); |
724 | | |
725 | 5.67M | x0 = q0 + q2; |
726 | 5.67M | x1 = q0 - q2; |
727 | | |
728 | 5.67M | q1 = pi2_src_ptr[1]; |
729 | 5.67M | INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4); |
730 | | |
731 | 5.67M | q3 = pi2_src_ptr[3]; |
732 | 5.67M | INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4); |
733 | | |
734 | 5.67M | x2 = (q1 >> 1) - q3; |
735 | 5.67M | x3 = q1 + (q3 >> 1); |
736 | | |
737 | 5.67M | pi2_tmp_ptr[0] = x0 + x3; |
738 | 5.67M | pi2_tmp_ptr[1] = x1 + x2; |
739 | 5.67M | pi2_tmp_ptr[2] = x1 - x2; |
740 | 5.67M | pi2_tmp_ptr[3] = x0 - x3; |
741 | | |
742 | 5.67M | pi2_src_ptr += i4_src_stride; |
743 | 5.67M | pi2_tmp_ptr += SUB_BLK_WIDTH_4x4; |
744 | 5.67M | pu2_iscal_mat += SUB_BLK_WIDTH_4x4; |
745 | 5.67M | pu2_weigh_mat += SUB_BLK_WIDTH_4x4; |
746 | 5.67M | } |
747 | | |
748 | | /* vertical inverse transform */ |
749 | 1.41M | pi2_tmp_ptr = pi2_tmp; |
750 | 7.09M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
751 | 5.67M | { |
752 | 5.67M | pu1_pred_ptr = pu1_pred; |
753 | 5.67M | pi2_res_ptr = pi2_res; |
754 | 5.67M | pi2_res_pred_ptr = pi2_res_pred; |
755 | 5.67M | pu1_out = pu1_out_ptr; |
756 | | |
757 | 5.67M | x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]); |
758 | 5.67M | x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]); |
759 | 5.67M | x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12]; |
760 | 5.67M | x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1); |
761 | | |
762 | | /* inverse prediction */ |
763 | 5.67M | i_macro = x0 + x3; |
764 | 5.67M | i_macro = ((i_macro + 32) >> 6); |
765 | 5.67M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
766 | 5.67M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
767 | 5.67M | pu1_pred_ptr += i4_pred_stride; |
768 | 5.67M | pu1_out += i4_out_stride; |
769 | 5.67M | pi2_res_ptr += i4_res_stride; |
770 | 5.67M | pi2_res_pred_ptr += i4_res_pred_stride; |
771 | | |
772 | 5.67M | i_macro = x1 + x2; |
773 | 5.67M | i_macro = ((i_macro + 32) >> 6); |
774 | 5.67M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
775 | 5.67M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
776 | 5.67M | pu1_pred_ptr += i4_pred_stride; |
777 | 5.67M | pu1_out += i4_out_stride; |
778 | 5.67M | pi2_res_ptr += i4_res_stride; |
779 | 5.67M | pi2_res_pred_ptr += i4_res_pred_stride; |
780 | | |
781 | 5.67M | i_macro = x1 - x2; |
782 | 5.67M | i_macro = ((i_macro + 32) >> 6); |
783 | 5.67M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
784 | 5.67M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
785 | 5.67M | pu1_pred_ptr += i4_pred_stride; |
786 | 5.67M | pu1_out += i4_out_stride; |
787 | 5.67M | pi2_res_ptr += i4_res_stride; |
788 | 5.67M | pi2_res_pred_ptr += i4_res_pred_stride; |
789 | | |
790 | 5.67M | i_macro = x0 - x3; |
791 | 5.67M | i_macro = ((i_macro + 32) >> 6); |
792 | 5.67M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
793 | 5.67M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
794 | | |
795 | 5.67M | pi2_tmp_ptr++; |
796 | 5.67M | pu1_out_ptr += 2; // Interleaved store for output |
797 | 5.67M | pu1_pred += 2; // Interleaved load for pred buffer |
798 | 5.67M | pi2_res += 2; |
799 | 5.67M | pi2_res_pred += 2; |
800 | 5.67M | } |
801 | 1.41M | } |
802 | | |
803 | | /* |
804 | | ******************************************************************************** |
805 | | * |
806 | | * @brief This function reconstructs a 4x4 sub block from quantized resiude and |
807 | | * prediction buffer if only dc value is present for residue |
808 | | * |
809 | | * @par Description: |
810 | | * The quantized residue is first inverse quantized, |
811 | | * This inverse quantized content is added to the prediction buffer to recon- |
812 | | * struct the end output |
813 | | * |
814 | | * @param[in] pi2_src |
815 | | * quantized dc coefficient |
816 | | * |
817 | | * @param[in] pu1_pred |
818 | | * prediction 4x4 block in interleaved format |
819 | | * |
820 | | * @param[in] i4_pred_stride, |
821 | | * Prediction buffer stride in interleaved format |
822 | | * |
823 | | * @param[in] i4_out_stride |
824 | | * recon buffer Stride |
825 | | * |
826 | | * @returns none |
827 | | * |
828 | | * @remarks none |
829 | | * |
830 | | ******************************************************************************* |
831 | | */ |
832 | | |
833 | | void isvc_iquant_itrans_recon_chroma_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
834 | | buffer_container_t *ps_res_pred, |
835 | | buffer_container_t *ps_res, buffer_container_t *ps_rec, |
836 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, |
837 | | WORD16 *pi2_tmp, WORD16 *pi2_dc_src, |
838 | | WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) |
839 | 624k | { |
840 | 624k | WORD32 q0; |
841 | 624k | WORD16 i_macro, i; |
842 | | |
843 | 624k | WORD16 *pi2_src = ps_src->pv_data; |
844 | 624k | WORD16 *pi2_res = ps_res->pv_data; |
845 | 624k | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
846 | 624k | UWORD8 *pu1_pred = ps_pred->pv_data; |
847 | 624k | UWORD8 *pu1_out = ps_rec->pv_data; |
848 | 624k | WORD32 i4_res_stride = ps_res->i4_data_stride; |
849 | 624k | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
850 | 624k | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
851 | 624k | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
852 | 624k | const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; |
853 | 624k | const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; |
854 | 624k | UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; |
855 | 624k | UWORD8 *pu1_pred_ptr = pu1_pred; |
856 | 624k | WORD16 *pi2_res_ptr = pi2_res; |
857 | 624k | WORD16 *pi2_res_pred_ptr = pi2_res_pred; |
858 | 624k | UWORD8 *pu1_out_ptr = pu1_out; |
859 | | |
860 | 624k | UNUSED(pi2_src); |
861 | 624k | UNUSED(pu2_iscal_mat); |
862 | 624k | UNUSED(pu2_weigh_mat); |
863 | 624k | UNUSED(u4_qp_div_6); |
864 | 624k | UNUSED(pi2_tmp); |
865 | 624k | UNUSED(i4_iq_start_idx); |
866 | | |
867 | 624k | q0 = pi2_dc_src[0]; // Restoring dc value for intra case3 |
868 | 624k | i_macro = ((q0 + 32) >> 6); |
869 | | |
870 | 3.11M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
871 | 2.49M | { |
872 | 2.49M | pu1_pred_ptr = pu1_pred; |
873 | 2.49M | pi2_res_ptr = pi2_res; |
874 | 2.49M | pi2_res_pred_ptr = pi2_res_pred; |
875 | 2.49M | pu1_out = pu1_out_ptr; |
876 | | |
877 | | /* inverse prediction */ |
878 | 2.49M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
879 | 2.49M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
880 | 2.49M | pu1_pred_ptr += i4_pred_stride; |
881 | 2.49M | pu1_out += i4_out_stride; |
882 | 2.49M | pi2_res_ptr += i4_res_stride; |
883 | 2.49M | pi2_res_pred_ptr += i4_res_pred_stride; |
884 | | |
885 | 2.49M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
886 | 2.49M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
887 | 2.49M | pu1_pred_ptr += i4_pred_stride; |
888 | 2.49M | pu1_out += i4_out_stride; |
889 | 2.49M | pi2_res_ptr += i4_res_stride; |
890 | 2.49M | pi2_res_pred_ptr += i4_res_pred_stride; |
891 | | |
892 | 2.49M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
893 | 2.49M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
894 | 2.49M | pu1_pred_ptr += i4_pred_stride; |
895 | 2.49M | pu1_out += i4_out_stride; |
896 | 2.49M | pi2_res_ptr += i4_res_stride; |
897 | 2.49M | pi2_res_pred_ptr += i4_res_pred_stride; |
898 | | |
899 | 2.49M | pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); |
900 | 2.49M | *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); |
901 | | |
902 | 2.49M | pu1_out_ptr += 2; |
903 | 2.49M | pu1_pred += 2; |
904 | 2.49M | pi2_res += 2; |
905 | 2.49M | pi2_res_pred += 2; |
906 | 2.49M | } |
907 | 624k | } |
908 | | |
909 | | /* |
910 | | ******************************************************************************** |
911 | | * |
912 | | * @brief This function reconstructs a 4x4 sub block from quantized residue and |
913 | | * prediction buffer assuming cbf=0 |
914 | | * |
915 | | * @param[in] ps_src |
916 | | * quantized 4x4 block |
917 | | * |
918 | | * @param[in] ps_pred |
919 | | * prediction 4x4 block |
920 | | * |
921 | | * @param[in] ps_res |
922 | | * residue 4x4 block |
923 | | * |
924 | | * @param[in] ps_res_pred |
925 | | * residual pred 4x4 block |
926 | | * |
927 | | * @param[out] ps_out |
928 | | * reconstructed 4x4 block |
929 | | * |
930 | | * @param[out] ps_iq_it_res_rec_constants |
931 | | * reconstructed 4x4 block |
932 | | * |
933 | | * @param[out] pi2_tmp |
934 | | * scratch buf |
935 | | * |
936 | | * @param[out] pi2_dc_src |
937 | | * Pointer to dc coeff location |
938 | | * |
939 | | * @param[out] i4_iq_start_idx |
940 | | * Idx of first coeff |
941 | | * |
942 | | * @param[in] pi2_tmp |
943 | | * temporary buffer of size 1*16 |
944 | | * |
945 | | * @param[in] u1_res_accumulate |
946 | | * Flag to control residual accumulation |
947 | | * |
948 | | * @returns none |
949 | | * |
950 | | ******************************************************************************* |
951 | | */ |
952 | | void isvc_zcbf_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
953 | | buffer_container_t *ps_res_pred, buffer_container_t *ps_res, |
954 | | buffer_container_t *ps_rec, |
955 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, |
956 | | WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, |
957 | | UWORD8 u1_res_accumulate) |
958 | 42.3M | { |
959 | 42.3M | WORD32 i, j; |
960 | | |
961 | 42.3M | UWORD8 *pu1_out = ps_rec->pv_data; |
962 | 42.3M | WORD16 *pi2_res = ps_res->pv_data; |
963 | 42.3M | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
964 | 42.3M | UWORD8 *pu1_pred = ps_pred->pv_data; |
965 | 42.3M | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
966 | 42.3M | WORD32 i4_res_stride = ps_res->i4_data_stride; |
967 | 42.3M | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
968 | 42.3M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
969 | | |
970 | 42.3M | UNUSED(ps_src); |
971 | 42.3M | UNUSED(ps_iq_it_res_rec_constants); |
972 | 42.3M | UNUSED(pi2_tmp); |
973 | 42.3M | UNUSED(pi2_dc_src); |
974 | 42.3M | UNUSED(i4_iq_start_idx); |
975 | | |
976 | 42.3M | if(u1_res_accumulate) |
977 | 229k | { |
978 | 1.14M | for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) |
979 | 919k | { |
980 | 4.59M | for(j = 0; j < SUB_BLK_WIDTH_4x4; j++) |
981 | 3.67M | { |
982 | 3.67M | pi2_res[j + i * i4_res_stride] = isvc_get_residue( |
983 | 3.67M | 0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate); |
984 | 3.67M | pu1_out[j + i * i4_out_stride] = |
985 | 3.67M | CLIP3(0, UINT8_MAX, |
986 | 3.67M | pu1_pred[j + i * i4_pred_stride] + pi2_res[j + i * i4_res_stride]); |
987 | 3.67M | } |
988 | 919k | } |
989 | 229k | } |
990 | 42.1M | else |
991 | 42.1M | { |
992 | 208M | for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) |
993 | 165M | { |
994 | 828M | for(j = 0; j < SUB_BLK_WIDTH_4x4; j++) |
995 | 662M | { |
996 | 662M | pi2_res[j + i * i4_res_stride] = 0; |
997 | 662M | pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride]; |
998 | 662M | } |
999 | 165M | } |
1000 | 42.1M | } |
1001 | 42.3M | } |
1002 | | |
1003 | | /* |
1004 | | ******************************************************************************** |
1005 | | * |
1006 | | * @brief This function reconstructs a 4x4 sub block from quantized residue and |
1007 | | * prediction buffer assuming cbf=0 |
1008 | | * |
1009 | | * @param[in] ps_src |
1010 | | * quantized 4x4 block |
1011 | | * |
1012 | | * @param[in] ps_pred |
1013 | | * prediction 4x4 block |
1014 | | * |
1015 | | * @param[in] ps_res |
1016 | | * residue 4x4 block |
1017 | | * |
1018 | | * @param[in] ps_res_pred |
1019 | | * residual pred 4x4 block |
1020 | | * |
1021 | | * @param[out] ps_out |
1022 | | * reconstructed 4x4 block |
1023 | | * |
1024 | | * @param[out] ps_iq_it_res_rec_constants |
1025 | | * reconstructed 4x4 block |
1026 | | * |
1027 | | * @param[out] pi2_tmp |
1028 | | * scratch buf |
1029 | | * |
1030 | | * @param[out] pi2_dc_src |
1031 | | * Pointer to dc coeff location |
1032 | | * |
1033 | | * @param[out] i4_iq_start_idx |
1034 | | * Idx of first coeff |
1035 | | * |
1036 | | * @param[in] pi2_tmp |
1037 | | * temporary buffer of size 1*16 |
1038 | | * |
1039 | | * @param[in] u1_res_accumulate |
1040 | | * Flag to control residual accumulation |
1041 | | * |
1042 | | * @returns none |
1043 | | * |
1044 | | ******************************************************************************* |
1045 | | */ |
1046 | | void isvc_chroma_zcbf_iquant_itrans_recon_4x4( |
1047 | | buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, |
1048 | | buffer_container_t *ps_res, buffer_container_t *ps_rec, |
1049 | | iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, |
1050 | | WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) |
1051 | 62.9M | { |
1052 | 62.9M | WORD32 i, j; |
1053 | | |
1054 | 62.9M | UWORD8 *pu1_out = ps_rec->pv_data; |
1055 | 62.9M | WORD32 i4_out_stride = ps_rec->i4_data_stride; |
1056 | 62.9M | WORD16 *pi2_res = ps_res->pv_data; |
1057 | 62.9M | WORD16 *pi2_res_pred = ps_res_pred->pv_data; |
1058 | 62.9M | UWORD8 *pu1_pred = ps_pred->pv_data; |
1059 | 62.9M | WORD32 i4_res_stride = ps_res->i4_data_stride; |
1060 | 62.9M | WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; |
1061 | 62.9M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
1062 | | |
1063 | 62.9M | UNUSED(ps_src); |
1064 | 62.9M | UNUSED(ps_iq_it_res_rec_constants); |
1065 | 62.9M | UNUSED(pi2_tmp); |
1066 | 62.9M | UNUSED(pi2_dc_src); |
1067 | 62.9M | UNUSED(i4_iq_start_idx); |
1068 | | |
1069 | 62.9M | if(u1_res_accumulate) |
1070 | 19.1k | { |
1071 | 95.9k | for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) |
1072 | 76.7k | { |
1073 | 383k | for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2) |
1074 | 307k | { |
1075 | 307k | pi2_res[j + i * i4_res_stride] = isvc_get_residue( |
1076 | 307k | 0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate); |
1077 | 307k | pu1_out[j + i * i4_out_stride] = CLIP3( |
1078 | 307k | 0, UINT8_MAX, |
1079 | 307k | ((WORD16) pu1_pred[j + i * i4_pred_stride]) + pi2_res[j + i * i4_res_stride]); |
1080 | 307k | } |
1081 | 76.7k | } |
1082 | 19.1k | } |
1083 | 62.9M | else |
1084 | 62.9M | { |
1085 | 313M | for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) |
1086 | 250M | { |
1087 | 1.25G | for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2) |
1088 | 999M | { |
1089 | 999M | pi2_res[j + i * i4_res_stride] = 0; |
1090 | 999M | pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride]; |
1091 | 999M | } |
1092 | 250M | } |
1093 | 62.9M | } |
1094 | 62.9M | } |