/src/libavc/common/svc/isvc_resi_trans_quant.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2022 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ih264_resi_trans_quant.c |
24 | | * |
25 | | * @brief |
26 | | * Contains function definitions single stage forward transform for H.264 |
27 | | * It will calculate the residue, do the cf and then do quantization |
28 | | * |
29 | | * @author |
30 | | * Ittiam |
31 | | * |
32 | | * @par List of Functions: |
33 | | * - ih264_resi_trans_quant_4x4() |
34 | | * - ih264_resi_trans_quant_chroma_4x4 |
35 | | * - ih264_hadamard_quant_4x4 |
36 | | * - ih264_hadamard_quant_2x2_uv |
37 | | * - ih264_resi_trans_quant_8x8 |
38 | | * |
39 | | * @remarks |
40 | | ******************************************************************************* |
41 | | */ |
42 | | /* System include files */ |
43 | | #include <stdbool.h> |
44 | | #include <stddef.h> |
45 | | |
46 | | /* User include files */ |
47 | | #include "ih264_typedefs.h" |
48 | | #include "ih264_defs.h" |
49 | | #include "ih264_size_defs.h" |
50 | | #include "ih264_macros.h" |
51 | | #include "ih264_trans_macros.h" |
52 | | #include "ih264_trans_data.h" |
53 | | #include "ih264_structs.h" |
54 | | #include "isvc_trans_quant_itrans_iquant.h" |
55 | | |
56 | | static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res) |
57 | 13.6M | { |
58 | 13.6M | return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res)); |
59 | 13.6M | } |
60 | | |
61 | | /** |
62 | | ******************************************************************************* |
63 | | * |
64 | | * @brief |
65 | | * This function performs forward transform and quantization on a 4*4 block |
66 | | * |
67 | | * @par Description: |
68 | | * The function accepts source buffer and estimation buffer. From these, it |
69 | | * computes the residue. This is residue is then transformed and quantized. |
70 | | * The transform and quantization are in placed computed. They use the residue |
71 | | * buffer for this. |
72 | | * |
73 | | * @param[in] pu1_src |
74 | | * Pointer to source sub-block |
75 | | * |
76 | | * @param[in] pu1_pred |
77 | | * Pointer to prediction sub-block |
78 | | * |
79 | | * @param[in] pi2_out |
80 | | * Pointer to residual sub-block |
81 | | * |
82 | | * @param[in] i4_src_stride |
83 | | * Source stride |
84 | | * |
85 | | * @param[in] i4_pred_stride |
86 | | * Prediction stride |
87 | | * |
88 | | * @param[in] dst_strd |
89 | | * Destination stride |
90 | | * |
91 | | * @param[in] u4_qbits |
92 | | * QP_BITS_h264_4x4 + floor(QP/6) |
93 | | * |
94 | | * @param[in] pu2_threshold_matrix |
95 | | * Pointer to Forward Quant Threshold Matrix |
96 | | * |
97 | | * @param[in] pu2_scale_matrix |
98 | | * Pointer to Forward Quant Scale Matrix |
99 | | * |
100 | | * @param[in] u4_round_factor |
101 | | * Quantization Round factor |
102 | | * |
103 | | * @param[out] pu1_nnz |
104 | | * Total non-zero coefficients in the current sub-block |
105 | | * |
106 | | * @returns |
107 | | * |
108 | | * @remarks |
109 | | * None |
110 | | * |
111 | | ******************************************************************************* |
112 | | */ |
113 | | void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
114 | | buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res, |
115 | | resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz, |
116 | | WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) |
117 | 96.1M | { |
118 | 96.1M | UWORD32 i; |
119 | 96.1M | WORD32 x0, x1, x2, x3, x4, x5, x6, x7; |
120 | 96.1M | WORD32 i4_value; |
121 | | |
122 | 96.1M | UWORD8 *pu1_src = ps_src->pv_data; |
123 | 96.1M | UWORD8 *pu1_pred = ps_pred->pv_data; |
124 | 96.1M | WORD16 *pi2_out = ps_out->pv_data; |
125 | 96.1M | WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL; |
126 | 96.1M | WORD32 i4_src_stride = ps_src->i4_data_stride; |
127 | 96.1M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
128 | 18.4E | WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; |
129 | 96.1M | WORD16 *pi2_out_tmp = pi2_out; |
130 | 96.1M | UWORD32 u4_nonzero_coeff = 0; |
131 | 96.1M | const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; |
132 | 96.1M | const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; |
133 | 96.1M | UWORD32 u4_qbits = ps_quant_constants->u4_qbits; |
134 | 96.1M | UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; |
135 | | |
136 | 476M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
137 | 380M | { |
138 | | /* computing prediction error (residue) */ |
139 | 380M | x4 = pu1_src[0] - pu1_pred[0]; |
140 | 380M | x5 = pu1_src[1] - pu1_pred[1]; |
141 | 380M | x6 = pu1_src[2] - pu1_pred[2]; |
142 | 380M | x7 = pu1_src[3] - pu1_pred[3]; |
143 | | |
144 | 380M | if(u1_use_upsampled_res) |
145 | 2.22M | { |
146 | 2.22M | x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]); |
147 | 2.22M | x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]); |
148 | 2.22M | x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]); |
149 | 2.22M | x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]); |
150 | 2.22M | } |
151 | | |
152 | | /* Horizontal transform */ |
153 | 380M | x0 = x4 + x7; |
154 | 380M | x1 = x5 + x6; |
155 | 380M | x2 = x5 - x6; |
156 | 380M | x3 = x4 - x7; |
157 | | |
158 | 380M | pi2_out_tmp[0] = x0 + x1; |
159 | 380M | pi2_out_tmp[1] = (x3 << 1) + x2; |
160 | 380M | pi2_out_tmp[2] = x0 - x1; |
161 | 380M | pi2_out_tmp[3] = x3 - (x2 << 1); |
162 | | |
163 | | /* pointing to next row; */ |
164 | 380M | pu1_src += i4_src_stride; |
165 | 380M | pu1_pred += i4_pred_stride; |
166 | 380M | pi2_out_tmp += 4; |
167 | 380M | pi2_upsampled_res += i4_upsampled_res_stride; |
168 | 380M | } |
169 | | |
170 | 96.1M | pi2_out_tmp = pi2_out; |
171 | | |
172 | 467M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
173 | 370M | { |
174 | | /* Vertical transform and quantization */ |
175 | 370M | x4 = pi2_out_tmp[0]; |
176 | 370M | x5 = pi2_out_tmp[4]; |
177 | 370M | x6 = pi2_out_tmp[8]; |
178 | 370M | x7 = pi2_out_tmp[12]; |
179 | | |
180 | 370M | x0 = x4 + x7; |
181 | 370M | x1 = x5 + x6; |
182 | 370M | x2 = x5 - x6; |
183 | 370M | x3 = x4 - x7; |
184 | | |
185 | | /* quantization is done in place */ |
186 | | |
187 | 370M | i4_value = x0 + x1; |
188 | | |
189 | 370M | if(i == 0) |
190 | 96.7M | { |
191 | 96.7M | (*pi2_dc_out) = i4_value; |
192 | 96.7M | } |
193 | | |
194 | 370M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
195 | 370M | u4_nonzero_coeff); |
196 | 370M | pi2_out_tmp[0] = i4_value; |
197 | | |
198 | 370M | i4_value = (x3 << 1) + x2; |
199 | 370M | FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, |
200 | 370M | u4_nonzero_coeff); |
201 | 370M | pi2_out_tmp[4] = i4_value; |
202 | | |
203 | 370M | i4_value = x0 - x1; |
204 | 370M | FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, |
205 | 370M | u4_nonzero_coeff); |
206 | 370M | pi2_out_tmp[8] = i4_value; |
207 | | |
208 | 370M | i4_value = x3 - (x2 << 1); |
209 | 370M | FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, |
210 | 370M | u4_qbits, u4_nonzero_coeff); |
211 | 370M | pi2_out_tmp[12] = i4_value; |
212 | | |
213 | 370M | pi2_out_tmp++; |
214 | 370M | pu2_scale_matrix++; |
215 | 370M | pu2_threshold_matrix++; |
216 | 370M | } |
217 | | |
218 | | /* Return total nonzero coefficients in the current sub block */ |
219 | 96.1M | *pu1_nnz = u4_nonzero_coeff; |
220 | 96.1M | } |
221 | | |
222 | | /** |
223 | | ******************************************************************************* |
224 | | * |
225 | | * @brief |
226 | | * This function performs forward transform and quantization on a 4*4 chroma |
227 | | *block with interleaved values |
228 | | * |
229 | | * @par Description: |
230 | | * The function accepts source buffer and estimation buffer. From these, it |
231 | | * computes the residue. This is residue is then transformed and quantized. |
232 | | * The transform and quantization are in placed computed. They use the residue |
233 | | * buffer for this. |
234 | | * |
235 | | * @param[in] pu1_src |
236 | | * Pointer to source sub-block |
237 | | * |
238 | | * @param[in] pu1_pred |
239 | | * Pointer to prediction sub-block |
240 | | * |
241 | | * @param[in] pi2_out |
242 | | * Pointer to residual sub-block |
243 | | * |
244 | | * @param[in] i4_src_stride |
245 | | * Source stride |
246 | | * |
247 | | * @param[in] i4_pred_stride |
248 | | * Prediction stride |
249 | | * |
250 | | * @param[in] dst_strd |
251 | | * Destination stride |
252 | | * |
253 | | * @param[in] u4_qbits |
254 | | * QP_BITS_h264_4x4 + floor(QP/6) |
255 | | * |
256 | | * @param[in] pu2_threshold_matrix |
257 | | * Pointer to Forward Quant Threshold Matrix |
258 | | * |
259 | | * @param[in] pu2_scale_matrix |
260 | | * Pointer to Forward Quant Scale Matrix |
261 | | * |
262 | | * @param[in] u4_round_factor |
263 | | * Quantization Round factor |
264 | | * |
265 | | * @param[out] pu1_nnz |
266 | | * Total non-zero coefficients in the current sub-block |
267 | | * |
268 | | * @returns |
269 | | * |
270 | | * @remarks |
271 | | * None |
272 | | * |
273 | | ******************************************************************************* |
274 | | */ |
275 | | void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
276 | | buffer_container_t *ps_out, |
277 | | buffer_container_t *ps_upsampled_res, |
278 | | resi_trans_quant_constants_t *ps_quant_constants, |
279 | | UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, |
280 | | UWORD8 u1_use_upsampled_res) |
281 | 47.8M | { |
282 | 47.8M | UWORD32 i; |
283 | 47.8M | WORD32 x0, x1, x2, x3, x4, x5, x6, x7; |
284 | 47.8M | WORD32 i4_value; |
285 | | |
286 | 47.8M | UWORD8 *pu1_src = ps_src->pv_data; |
287 | 47.8M | UWORD8 *pu1_pred = ps_pred->pv_data; |
288 | 47.8M | WORD16 *pi2_out = ps_out->pv_data; |
289 | 47.9M | WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL; |
290 | 47.8M | WORD32 i4_src_stride = ps_src->i4_data_stride; |
291 | 47.8M | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
292 | 18.4E | WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; |
293 | 47.8M | WORD16 *pi2_out_tmp = pi2_out; |
294 | 47.8M | UWORD32 u4_nonzero_coeff = 0; |
295 | 47.8M | const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; |
296 | 47.8M | const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; |
297 | 47.8M | UWORD32 u4_qbits = ps_quant_constants->u4_qbits; |
298 | 47.8M | UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; |
299 | | |
300 | 239M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
301 | 191M | { |
302 | | /* computing prediction error (residue) */ |
303 | 191M | x4 = pu1_src[0] - pu1_pred[0]; |
304 | 191M | x5 = pu1_src[2] - pu1_pred[2]; |
305 | 191M | x6 = pu1_src[4] - pu1_pred[4]; |
306 | 191M | x7 = pu1_src[6] - pu1_pred[6]; |
307 | | |
308 | 191M | if(u1_use_upsampled_res) |
309 | 1.19M | { |
310 | 1.19M | x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]); |
311 | 1.19M | x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]); |
312 | 1.19M | x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]); |
313 | 1.19M | x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]); |
314 | 1.19M | } |
315 | | |
316 | | /* Horizontal transform */ |
317 | 191M | x0 = x4 + x7; |
318 | 191M | x1 = x5 + x6; |
319 | 191M | x2 = x5 - x6; |
320 | 191M | x3 = x4 - x7; |
321 | | |
322 | 191M | pi2_out_tmp[0] = x0 + x1; |
323 | 191M | pi2_out_tmp[1] = (x3 << 1) + x2; |
324 | 191M | pi2_out_tmp[2] = x0 - x1; |
325 | 191M | pi2_out_tmp[3] = x3 - (x2 << 1); |
326 | | |
327 | | /* pointing to next row; */ |
328 | 191M | pu1_src += i4_src_stride; |
329 | 191M | pu1_pred += i4_pred_stride; |
330 | 191M | pi2_out_tmp += 4; |
331 | 191M | pi2_upsampled_res += i4_upsampled_res_stride; |
332 | 191M | } |
333 | 47.8M | pi2_out_tmp = pi2_out; |
334 | 238M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
335 | 190M | { |
336 | | /* Vertical transform and quantization */ |
337 | 190M | x4 = pi2_out_tmp[0]; |
338 | 190M | x5 = pi2_out_tmp[4]; |
339 | 190M | x6 = pi2_out_tmp[8]; |
340 | 190M | x7 = pi2_out_tmp[12]; |
341 | | |
342 | 190M | x0 = x4 + x7; |
343 | 190M | x1 = x5 + x6; |
344 | 190M | x2 = x5 - x6; |
345 | 190M | x3 = x4 - x7; |
346 | | |
347 | | /* quantization is done in place */ |
348 | | |
349 | 190M | i4_value = x0 + x1; |
350 | | |
351 | 190M | if(i == 0) |
352 | 48.0M | { |
353 | 48.0M | *pi2_dc_out = i4_value; |
354 | 48.0M | } |
355 | | |
356 | 190M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
357 | 190M | u4_nonzero_coeff); |
358 | 190M | pi2_out_tmp[0] = i4_value; |
359 | | |
360 | 190M | i4_value = (x3 << 1) + x2; |
361 | 190M | FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, |
362 | 190M | u4_nonzero_coeff); |
363 | 190M | pi2_out_tmp[4] = i4_value; |
364 | | |
365 | 190M | i4_value = x0 - x1; |
366 | 190M | FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, |
367 | 190M | u4_nonzero_coeff); |
368 | 190M | pi2_out_tmp[8] = i4_value; |
369 | | |
370 | 190M | i4_value = x3 - (x2 << 1); |
371 | 190M | FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, |
372 | 190M | u4_qbits, u4_nonzero_coeff); |
373 | 190M | pi2_out_tmp[12] = i4_value; |
374 | | |
375 | 190M | pi2_out_tmp++; |
376 | 190M | pu2_scale_matrix++; |
377 | 190M | pu2_threshold_matrix++; |
378 | 190M | } |
379 | | |
380 | | /* Return total nonzero coefficients in the current sub block */ |
381 | 47.8M | *pu1_nnz = u4_nonzero_coeff; |
382 | 47.8M | } |
383 | | |
384 | | /** |
385 | | ******************************************************************************* |
386 | | * |
387 | | * @brief |
388 | | * This function performs forward hadamard transform and quantization on a 4*4 |
389 | | *block |
390 | | * |
391 | | * @par Description: |
392 | | * The function accepts source buffer and estimation buffer. From these, it |
393 | | * computes the residue. This is residue is then transformed and quantized. |
394 | | * The transform and quantization are in placed computed. They use the residue |
395 | | * buffer for this. |
396 | | * |
397 | | * @param[in] pu1_src |
398 | | * Pointer to source sub-block |
399 | | * |
400 | | * @param[in] pu1_pred |
401 | | * Pointer to prediction sub-block |
402 | | * |
403 | | * @param[in] pi2_out |
404 | | * Pointer to residual sub-block |
405 | | * |
406 | | * @param[in] i4_src_stride |
407 | | * Source stride |
408 | | * |
409 | | * @param[in] i4_pred_stride |
410 | | * Prediction stride |
411 | | * |
412 | | * @param[in] dst_strd |
413 | | * Destination stride |
414 | | * |
415 | | * @param[in] u4_qbits |
416 | | * QP_BITS_h264_4x4 + floor(QP/6) |
417 | | * |
418 | | * @param[in] pu2_threshold_matrix |
419 | | * Pointer to Forward Quant Threshold Matrix |
420 | | * |
421 | | * @param[in] pu2_scale_matrix |
422 | | * Pointer to Forward Quant Scale Matrix |
423 | | * |
424 | | * @param[in] u4_round_factor |
425 | | * Quantization Round factor |
426 | | * |
427 | | * @param[out] pu1_nnz |
428 | | * Total non-zero coefficients in the current sub-block |
429 | | * |
430 | | * @returns |
431 | | * |
432 | | * @remarks |
433 | | * None |
434 | | * |
435 | | */ |
436 | | |
437 | | void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst, |
438 | | resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz) |
439 | 3.51M | { |
440 | 3.51M | WORD32 i; |
441 | 3.51M | WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value; |
442 | | |
443 | 3.51M | const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; |
444 | 3.51M | const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; |
445 | 3.51M | UWORD32 u4_qbits = ps_quant_constants->u4_qbits; |
446 | 3.51M | UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; |
447 | | |
448 | 3.51M | *pu1_nnz = 0; |
449 | | |
450 | 17.5M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
451 | 14.0M | { |
452 | 14.0M | x4 = pi2_src[0]; |
453 | 14.0M | x5 = pi2_src[1]; |
454 | 14.0M | x6 = pi2_src[2]; |
455 | 14.0M | x7 = pi2_src[3]; |
456 | | |
457 | 14.0M | x0 = x4 + x7; |
458 | 14.0M | x1 = x5 + x6; |
459 | 14.0M | x2 = x5 - x6; |
460 | 14.0M | x3 = x4 - x7; |
461 | | |
462 | 14.0M | pi2_dst[0] = x0 + x1; |
463 | 14.0M | pi2_dst[1] = x3 + x2; |
464 | 14.0M | pi2_dst[2] = x0 - x1; |
465 | 14.0M | pi2_dst[3] = x3 - x2; |
466 | | |
467 | 14.0M | pi2_src += 4; |
468 | 14.0M | pi2_dst += 4; |
469 | 14.0M | } |
470 | | |
471 | | /* Vertical transform and quantization */ |
472 | 3.51M | pi2_dst -= SUB_BLK_WIDTH_4x4 << 2; |
473 | | |
474 | 17.5M | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
475 | 14.0M | { |
476 | 14.0M | x4 = pi2_dst[0]; |
477 | 14.0M | x5 = pi2_dst[4]; |
478 | 14.0M | x6 = pi2_dst[8]; |
479 | 14.0M | x7 = pi2_dst[12]; |
480 | | |
481 | 14.0M | x0 = x4 + x7; |
482 | 14.0M | x1 = x5 + x6; |
483 | 14.0M | x2 = x5 - x6; |
484 | 14.0M | x3 = x4 - x7; |
485 | | |
486 | 14.0M | i4_value = (x0 + x1) >> 1; |
487 | 14.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
488 | 14.0M | pu1_nnz[0]); |
489 | 14.0M | pi2_dst[0] = i4_value; |
490 | | |
491 | 14.0M | i4_value = (x3 + x2) >> 1; |
492 | 14.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
493 | 14.0M | pu1_nnz[0]); |
494 | 14.0M | pi2_dst[4] = i4_value; |
495 | | |
496 | 14.0M | i4_value = (x0 - x1) >> 1; |
497 | 14.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
498 | 14.0M | pu1_nnz[0]); |
499 | 14.0M | pi2_dst[8] = i4_value; |
500 | | |
501 | 14.0M | i4_value = (x3 - x2) >> 1; |
502 | 14.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
503 | 14.0M | pu1_nnz[0]); |
504 | 14.0M | pi2_dst[12] = i4_value; |
505 | | |
506 | 14.0M | pi2_dst++; |
507 | 14.0M | } |
508 | 3.51M | } |
509 | | |
510 | | /** |
511 | | ******************************************************************************* |
512 | | * |
513 | | * @brief |
514 | | * This function performs forward hadamard transform and quantization on a 2*2 |
515 | | *block for both U and V planes |
516 | | * |
517 | | * @par Description: |
518 | | * The function accepts source buffer and estimation buffer. From these, it |
519 | | * computes the residue. This is residue is then transformed and quantized. |
520 | | * The transform and quantization are in placed computed. They use the residue |
521 | | * buffer for this. |
522 | | * |
523 | | * @param[in] pu1_src |
524 | | * Pointer to source sub-block |
525 | | * |
526 | | * @param[in] pu1_pred |
527 | | * Pointer to prediction sub-block |
528 | | * |
529 | | * @param[in] pi2_out |
530 | | * Pointer to residual sub-block |
531 | | * |
532 | | * @param[in] i4_src_stride |
533 | | * Source stride |
534 | | * |
535 | | * @param[in] i4_pred_stride |
536 | | * Prediction stride |
537 | | * |
538 | | * @param[in] dst_strd |
539 | | * Destination stride |
540 | | * |
541 | | * @param[in] u4_qbits |
542 | | * QP_BITS_h264_4x4 + floor(QP/6) |
543 | | * |
544 | | * @param[in] pu2_threshold_matrix |
545 | | * Pointer to Forward Quant Threshold Matrix |
546 | | * |
547 | | * @param[in] pu2_scale_matrix |
548 | | * Pointer to Forward Quant Scale Matrix |
549 | | * |
550 | | * @param[in] u4_round_factor |
551 | | * Quantization Round factor |
552 | | * |
553 | | * @param[out] pu1_nnz |
554 | | * Total non-zero coefficients in the current sub-block |
555 | | * |
556 | | * @returns |
557 | | * |
558 | | * @remarks |
559 | | * NNZ for dc is populated at 0 and 5th position of pu1_nnz |
560 | | * |
561 | | */ |
562 | | |
563 | | void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst, |
564 | | resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz) |
565 | 6.03M | { |
566 | 6.03M | WORD32 x0, x1, x2, x3, x4, x5, x6, x7; |
567 | 6.03M | WORD32 i4_value, plane; |
568 | | |
569 | 6.03M | const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; |
570 | 6.03M | const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; |
571 | 6.03M | UWORD32 u4_qbits = ps_quant_constants->u4_qbits; |
572 | 6.03M | UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; |
573 | | |
574 | 18.0M | for(plane = 0; plane < 2; plane++) |
575 | 12.0M | { |
576 | 12.0M | pu1_nnz[plane] = 0; |
577 | | |
578 | | /* Horizontal transform */ |
579 | 12.0M | x4 = pi2_src[0]; |
580 | 12.0M | x5 = pi2_src[1]; |
581 | 12.0M | x6 = pi2_src[2]; |
582 | 12.0M | x7 = pi2_src[3]; |
583 | | |
584 | 12.0M | x0 = x4 + x5; |
585 | 12.0M | x1 = x4 - x5; |
586 | 12.0M | x2 = x6 + x7; |
587 | 12.0M | x3 = x6 - x7; |
588 | | |
589 | | /* Vertical transform and quantization */ |
590 | 12.0M | i4_value = (x0 + x2); |
591 | 12.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
592 | 12.0M | pu1_nnz[plane]); |
593 | 12.0M | pi2_dst[0] = i4_value; |
594 | | |
595 | 12.0M | i4_value = (x0 - x2); |
596 | 12.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
597 | 12.0M | pu1_nnz[plane]); |
598 | 12.0M | pi2_dst[2] = i4_value; |
599 | | |
600 | 12.0M | i4_value = (x1 - x3); |
601 | 12.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
602 | 12.0M | pu1_nnz[plane]); |
603 | 12.0M | pi2_dst[3] = i4_value; |
604 | | |
605 | 12.0M | i4_value = (x1 + x3); |
606 | 12.0M | FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
607 | 12.0M | pu1_nnz[plane]); |
608 | 12.0M | pi2_dst[1] = i4_value; |
609 | | |
610 | 12.0M | pi2_dst += 4; |
611 | 12.0M | pi2_src += 4; |
612 | 12.0M | } |
613 | 6.03M | } |
614 | | |
615 | | /* |
616 | | ******************************************************************************* |
617 | | * |
618 | | * @brief |
619 | | * This function performs Single stage forward transform CF8 and quantization |
620 | | *on 8*8 blocks for h.264 |
621 | | * |
622 | | * @par Description: |
623 | | * Performs single stage 8x8 forward transform CF8 after calculating the |
624 | | *residue The result is then quantized |
625 | | * |
626 | | * @param[in] pu1_src |
627 | | * Input 8x8 pixels |
628 | | * |
629 | | * @param[in] pu1_pred |
630 | | * Input 8x8 pixels |
631 | | * |
632 | | * @param[in] pi1_out |
633 | | * Output 8x8 pixels |
634 | | * |
635 | | * @param[in] u4_thresh |
636 | | * Threshold under which the coeffs are not quantized |
637 | | * |
638 | | * @param[in] u4_qp_div |
639 | | * QP/6 |
640 | | * |
641 | | * @param[in] u4_qp_rem |
642 | | * QP%6 |
643 | | * |
644 | | * @param[in] u2_src_stride |
645 | | * Source stride |
646 | | * |
647 | | * @param[in] i4_pred_stride |
648 | | * stride for prediciton buffer |
649 | | * |
650 | | * @param[in] dst_strd |
651 | | * stride for destination buffer |
652 | | * |
653 | | * @param[in] pu4_quant_mat |
654 | | * Pointer to the 4x4 quantization matrix |
655 | | * |
656 | | * @returns Void |
657 | | * |
658 | | * |
659 | | ******************************************************************************* |
660 | | */ |
661 | | void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred, |
662 | | buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res, |
663 | | resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz, |
664 | | WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) |
665 | 0 | { |
666 | 0 | UWORD32 i; |
667 | 0 | WORD32 a0, a1, a2, a3, a4, a5, a6, a7; |
668 | 0 | WORD32 r0, r1, r2, r3, r4, r5, r6, r7; |
669 | |
|
670 | 0 | UWORD8 *pu1_src = ps_src->pv_data; |
671 | 0 | UWORD8 *pu1_pred = ps_pred->pv_data; |
672 | 0 | WORD16 *pi2_out = ps_out->pv_data; |
673 | 0 | WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL; |
674 | 0 | WORD32 i4_src_stride = ps_src->i4_data_stride; |
675 | 0 | WORD32 i4_pred_stride = ps_pred->i4_data_stride; |
676 | 0 | WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; |
677 | 0 | WORD16 *pi2_out_tmp = pi2_out; |
678 | 0 | UWORD32 u4_nonzero_coeff = 0; |
679 | 0 | const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; |
680 | 0 | const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; |
681 | 0 | UWORD32 u4_qbits = ps_quant_constants->u4_qbits; |
682 | 0 | UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; |
683 | |
|
684 | 0 | UNUSED(pi2_dc_out); |
685 | | |
686 | | /*Horizontal transform */ |
687 | | /* we are going to use the a's and r's in a twisted way since */ |
688 | | /*i dont want to declare more variables */ |
689 | 0 | for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) |
690 | 0 | { |
691 | 0 | r0 = pu1_src[0]; |
692 | 0 | r0 -= pu1_pred[0]; |
693 | 0 | r1 = pu1_src[1]; |
694 | 0 | r1 -= pu1_pred[1]; |
695 | 0 | r2 = pu1_src[2]; |
696 | 0 | r2 -= pu1_pred[2]; |
697 | 0 | r3 = pu1_src[3]; |
698 | 0 | r3 -= pu1_pred[3]; |
699 | 0 | r4 = pu1_src[4]; |
700 | 0 | r4 -= pu1_pred[4]; |
701 | 0 | r5 = pu1_src[5]; |
702 | 0 | r5 -= pu1_pred[5]; |
703 | 0 | r6 = pu1_src[6]; |
704 | 0 | r6 -= pu1_pred[6]; |
705 | 0 | r7 = pu1_src[7]; |
706 | 0 | r7 -= pu1_pred[7]; |
707 | |
|
708 | 0 | if(u1_use_upsampled_res) |
709 | 0 | { |
710 | 0 | r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]); |
711 | 0 | r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]); |
712 | 0 | r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]); |
713 | 0 | r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]); |
714 | 0 | r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]); |
715 | 0 | r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]); |
716 | 0 | r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]); |
717 | 0 | r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]); |
718 | 0 | } |
719 | |
|
720 | 0 | a0 = r0 + r7; |
721 | 0 | a1 = r1 + r6; |
722 | 0 | a2 = r2 + r5; |
723 | 0 | a3 = r3 + r4; |
724 | |
|
725 | 0 | a4 = a0 + a3; |
726 | 0 | a5 = a1 + a2; |
727 | 0 | a6 = a0 - a3; |
728 | 0 | a7 = a1 - a2; |
729 | |
|
730 | 0 | pi2_out_tmp[0] = a4 + a5; |
731 | |
|
732 | 0 | pi2_out_tmp[2] = a6 + (a7 >> 1); |
733 | 0 | pi2_out_tmp[4] = a4 - a5; |
734 | 0 | pi2_out_tmp[6] = (a6 >> 1) - a7; |
735 | |
|
736 | 0 | a0 = r0 - r7; |
737 | 0 | a1 = r1 - r6; |
738 | 0 | a2 = r2 - r5; |
739 | 0 | a3 = r3 - r4; |
740 | |
|
741 | 0 | a4 = a1 + a2 + ((a0 >> 1) + a0); |
742 | 0 | a5 = a0 - a3 - ((a2 >> 1) + a2); |
743 | 0 | a6 = a0 + a3 - ((a1 >> 1) + a1); |
744 | 0 | a7 = a1 - a2 + ((a3 >> 1) + a3); |
745 | |
|
746 | 0 | pi2_out_tmp[1] = a4 + (a7 >> 2); |
747 | 0 | pi2_out_tmp[3] = a5 + (a6 >> 2); |
748 | 0 | pi2_out_tmp[5] = a6 - (a5 >> 2); |
749 | 0 | pi2_out_tmp[7] = (a4 >> 2) - a7; |
750 | |
|
751 | 0 | pu1_src += i4_src_stride; |
752 | 0 | pu1_pred += i4_pred_stride; |
753 | 0 | pi2_out_tmp += 8; |
754 | 0 | pi2_upsampled_res += i4_upsampled_res_stride; |
755 | 0 | } |
756 | | |
757 | | /*vertical transform and quant */ |
758 | |
|
759 | 0 | pi2_out_tmp = pi2_out; |
760 | |
|
761 | 0 | for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) |
762 | 0 | { |
763 | 0 | r0 = pi2_out_tmp[0]; |
764 | 0 | r1 = pi2_out_tmp[8]; |
765 | 0 | r2 = pi2_out_tmp[16]; |
766 | 0 | r3 = pi2_out_tmp[24]; |
767 | 0 | r4 = pi2_out_tmp[32]; |
768 | 0 | r5 = pi2_out_tmp[40]; |
769 | 0 | r6 = pi2_out_tmp[48]; |
770 | 0 | r7 = pi2_out_tmp[56]; |
771 | |
|
772 | 0 | a0 = r0 + r7; |
773 | 0 | a1 = r1 + r6; |
774 | 0 | a2 = r2 + r5; |
775 | 0 | a3 = r3 + r4; |
776 | |
|
777 | 0 | a4 = a0 + a3; |
778 | 0 | a5 = a1 + a2; |
779 | 0 | a6 = a0 - a3; |
780 | 0 | a7 = a1 - a2; |
781 | |
|
782 | 0 | a0 = r0 - r7; |
783 | 0 | a1 = r1 - r6; |
784 | 0 | a2 = r2 - r5; |
785 | 0 | a3 = r3 - r4; |
786 | |
|
787 | 0 | r0 = a4 + a5; |
788 | 0 | r2 = a6 + (a7 >> 1); |
789 | 0 | r4 = a4 - a5; |
790 | 0 | r6 = (a6 >> 1) - a7; |
791 | |
|
792 | 0 | a4 = a1 + a2 + ((a0 >> 1) + a0); |
793 | 0 | a5 = a0 - a3 - ((a2 >> 1) + a2); |
794 | 0 | a6 = a0 + a3 - ((a1 >> 1) + a1); |
795 | 0 | a7 = a1 - a2 + ((a3 >> 1) + a3); |
796 | |
|
797 | 0 | r1 = a4 + (a7 >> 2); |
798 | 0 | r3 = a5 + (a6 >> 2); |
799 | 0 | r5 = a6 - (a5 >> 2); |
800 | 0 | r7 = (a4 >> 2) - a7; |
801 | |
|
802 | 0 | FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
803 | 0 | u4_nonzero_coeff); |
804 | 0 | pi2_out_tmp[0] = r0; |
805 | |
|
806 | 0 | FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, |
807 | 0 | u4_nonzero_coeff); |
808 | 0 | pi2_out_tmp[8] = r1; |
809 | |
|
810 | 0 | FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits, |
811 | 0 | u4_nonzero_coeff); |
812 | 0 | pi2_out_tmp[16] = r2; |
813 | |
|
814 | 0 | FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits, |
815 | 0 | u4_nonzero_coeff); |
816 | 0 | pi2_out_tmp[24] = r3; |
817 | |
|
818 | 0 | FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits, |
819 | 0 | u4_nonzero_coeff); |
820 | 0 | pi2_out_tmp[32] = r4; |
821 | |
|
822 | 0 | FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits, |
823 | 0 | u4_nonzero_coeff); |
824 | 0 | pi2_out_tmp[40] = r5; |
825 | |
|
826 | 0 | FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits, |
827 | 0 | u4_nonzero_coeff); |
828 | 0 | pi2_out_tmp[48] = r6; |
829 | |
|
830 | 0 | FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits, |
831 | 0 | u4_nonzero_coeff); |
832 | 0 | pi2_out_tmp[56] = r7; |
833 | |
|
834 | 0 | pi2_out_tmp++; |
835 | 0 | pu2_scale_matrix++; |
836 | 0 | pu2_threshold_matrix++; |
837 | 0 | } |
838 | | /* Return total nonzero coefficients in the current sub block */ |
839 | 0 | *pu1_nnz = u4_nonzero_coeff; |
840 | 0 | } |