/src/libavc/common/ih264_resi_trans_quant.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * ih264_resi_trans_quant.c |
25 | | * |
26 | | * @brief |
27 | | * Contains function definitions single stage forward transform for H.264 |
28 | | * It will calculate the residue, do the cf and then do quantization |
29 | | * |
30 | | * @author |
31 | | * ittiam |
32 | | * |
33 | | * @par List of Functions: |
34 | | * - ih264_resi_trans_quant_4x4 |
35 | | * - ih264_resi_trans_quant_chroma_4x4 |
36 | | * - ih264_hadamard_quant_4x4 |
37 | | * - ih264_hadamard_quant_2x2_uv |
38 | | * - ih264_resi_trans_quant_8x8 |
39 | | * |
40 | | * @remarks |
41 | | * none |
42 | | * |
43 | | ******************************************************************************* |
44 | | */ |
45 | | |
46 | | |
47 | | /*****************************************************************************/ |
48 | | /* File Includes */ |
49 | | /*****************************************************************************/ |
50 | | |
51 | | /* System Include Files */ |
52 | | #include <stddef.h> |
53 | | |
54 | | /* User Include Files */ |
55 | | #include "ih264_typedefs.h" |
56 | | #include "ih264_defs.h" |
57 | | #include "ih264_macros.h" |
58 | | #include "ih264_size_defs.h" |
59 | | #include "ih264_trans_macros.h" |
60 | | #include "ih264_trans_data.h" |
61 | | #include "ih264_structs.h" |
62 | | #include "ih264_trans_quant_itrans_iquant.h" |
63 | | |
64 | | |
65 | | /*****************************************************************************/ |
66 | | /* Function Definitions */ |
67 | | /*****************************************************************************/ |
68 | | |
69 | | /** |
70 | | ******************************************************************************* |
71 | | * |
72 | | * @brief |
73 | | * This function performs forward transform and quantization on a 4x4 block |
74 | | * |
75 | | * @par Description: |
76 | | * The function accepts source buffer and estimation buffer. From these, it |
77 | | * computes the residue. This is residue is then transformed and quantized. |
78 | | * The transform and quantization are in placed computed. They use the residue |
79 | | * buffer for this. |
80 | | * |
81 | | * @param[in] pu1_src |
82 | | * Pointer to source sub-block |
83 | | * |
84 | | * @param[in] pu1_pred |
85 | | * Pointer to prediction sub-block |
86 | | * |
87 | | * @param[in] pi2_out |
88 | | * Pointer to residual sub-block |
89 | | * |
90 | | * @param[in] src_strd |
91 | | * Source stride |
92 | | * |
93 | | * @param[in] pred_strd |
94 | | * Prediction stride |
95 | | * |
96 | | * @param[in] pu2_scale_matrix |
97 | | * Pointer to Forward Quant Scale Matrix |
98 | | * |
99 | | * @param[in] pu2_threshold_matrix |
100 | | * Pointer to Forward Quant Threshold Matrix |
101 | | * |
102 | | * @param[in] u4_qbits |
103 | | * QP_BITS_h264_4x4 + floor(QP/6) |
104 | | * |
105 | | * @param[in] u4_round_factor |
106 | | * Quantization Round factor |
107 | | * |
108 | | * @param[out] pu1_nnz |
109 | | * Total non-zero coefficients in the current sub-block |
110 | | * |
111 | | * @param[in] pi2_alt_dc_addr |
112 | | * DC Coefficient of the block |
113 | | * |
114 | | * @remarks none |
115 | | * |
116 | | ******************************************************************************* |
117 | | */ |
118 | | void ih264_resi_trans_quant_4x4(UWORD8 *pu1_src, |
119 | | UWORD8 *pu1_pred, |
120 | | WORD16 *pi2_out, |
121 | | WORD32 src_strd, |
122 | | WORD32 pred_strd, |
123 | | const UWORD16 *pu2_scale_matrix, |
124 | | const UWORD16 *pu2_threshold_matrix, |
125 | | UWORD32 u4_qbits, |
126 | | UWORD32 u4_round_factor, |
127 | | UWORD8 *pu1_nnz, |
128 | | WORD16 *pi2_alt_dc_addr) |
129 | 0 | { |
130 | 0 | UWORD32 i; |
131 | 0 | WORD32 x0, x1, x2, x3, x4, x5, x6, x7; |
132 | 0 | WORD32 i4_value; |
133 | 0 | WORD16 *pi2_out_tmp = pi2_out; |
134 | 0 | UWORD32 u4_nonzero_coeff = 0; |
135 | |
|
136 | 0 | for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
137 | 0 | { |
138 | | /* computing prediction error (residue) */ |
139 | 0 | x4 = pu1_src[0] - pu1_pred[0]; |
140 | 0 | x5 = pu1_src[1] - pu1_pred[1]; |
141 | 0 | x6 = pu1_src[2] - pu1_pred[2]; |
142 | 0 | x7 = pu1_src[3] - pu1_pred[3]; |
143 | | |
144 | | /* Horizontal transform */ |
145 | 0 | x0 = x4 + x7; |
146 | 0 | x1 = x5 + x6; |
147 | 0 | x2 = x5 - x6; |
148 | 0 | x3 = x4 - x7; |
149 | |
|
150 | 0 | pi2_out_tmp[0] = x0 + x1; |
151 | 0 | pi2_out_tmp[1] = (x3 << 1) + x2; |
152 | 0 | pi2_out_tmp[2] = x0 - x1; |
153 | 0 | pi2_out_tmp[3] = x3 - (x2 << 1); |
154 | | |
155 | | /* pointing to next row; */ |
156 | 0 | pu1_src += src_strd; |
157 | 0 | pu1_pred += pred_strd; |
158 | 0 | pi2_out_tmp += 4; |
159 | 0 | } |
160 | |
|
161 | 0 | pi2_out_tmp = pi2_out; |
162 | 0 | for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
163 | 0 | { |
164 | | /* Vertical transform and quantization */ |
165 | 0 | x4 = pi2_out_tmp[0]; |
166 | 0 | x5 = pi2_out_tmp[4]; |
167 | 0 | x6 = pi2_out_tmp[8]; |
168 | 0 | x7 = pi2_out_tmp[12]; |
169 | |
|
170 | 0 | x0 = x4 + x7; |
171 | 0 | x1 = x5 + x6; |
172 | 0 | x2 = x5 - x6; |
173 | 0 | x3 = x4 - x7; |
174 | | |
175 | | /* quantization is done in place */ |
176 | 0 | i4_value = x0 + x1; |
177 | 0 | if(i == 0) |
178 | 0 | { |
179 | 0 | (*pi2_alt_dc_addr) = i4_value; |
180 | 0 | } |
181 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
182 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
183 | 0 | u4_nonzero_coeff); |
184 | 0 | pi2_out_tmp[0] = i4_value; |
185 | |
|
186 | 0 | i4_value = (x3 << 1) + x2; |
187 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[4], |
188 | 0 | pu2_scale_matrix[4], u4_round_factor, u4_qbits, |
189 | 0 | u4_nonzero_coeff); |
190 | 0 | pi2_out_tmp[4] = i4_value; |
191 | |
|
192 | 0 | i4_value = x0 - x1; |
193 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[8], |
194 | 0 | pu2_scale_matrix[8], u4_round_factor, u4_qbits, |
195 | 0 | u4_nonzero_coeff); |
196 | 0 | pi2_out_tmp[8] = i4_value; |
197 | |
|
198 | 0 | i4_value = x3 - (x2 << 1); |
199 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[12], |
200 | 0 | pu2_scale_matrix[12], u4_round_factor, u4_qbits, |
201 | 0 | u4_nonzero_coeff); |
202 | 0 | pi2_out_tmp[12] = i4_value; |
203 | |
|
204 | 0 | pi2_out_tmp++; |
205 | 0 | pu2_scale_matrix++; |
206 | 0 | pu2_threshold_matrix++; |
207 | 0 | } |
208 | | |
209 | | /* Return total nonzero coefficients in the current sub block */ |
210 | 0 | *pu1_nnz = u4_nonzero_coeff; |
211 | 0 | } |
212 | | |
213 | | /** |
214 | | ******************************************************************************* |
215 | | * |
216 | | * @brief |
217 | | * This function performs forward transform and quantization on a 4x4 |
218 | | * chroma block with interleaved values |
219 | | * |
220 | | * @par Description: |
221 | | * The function accepts source buffer and estimation buffer. From these, it |
222 | | * computes the residue. This is residue is then transformed and quantized. |
223 | | * The transform and quantization are in placed computed. They use the residue |
224 | | * buffer for this. |
225 | | * |
226 | | * @param[in] pu1_src |
227 | | * Pointer to source sub-block |
228 | | * |
229 | | * @param[in] pu1_pred |
230 | | * Pointer to prediction sub-block |
231 | | * |
232 | | * @param[in] pi2_out |
233 | | * Pointer to residual sub-block |
234 | | * |
235 | | * @param[in] src_strd |
236 | | * Source stride |
237 | | * |
238 | | * @param[in] pred_strd |
239 | | * Prediction stride |
240 | | * |
241 | | * @param[in] pu2_scale_matrix |
242 | | * Pointer to Forward Quant Scale Matrix |
243 | | * |
244 | | * @param[in] pu2_threshold_matrix |
245 | | * Pointer to Forward Quant Threshold Matrix |
246 | | * |
247 | | * @param[in] u4_qbits |
248 | | * QP_BITS_h264_4x4 + floor(QP/6) |
249 | | * |
250 | | * @param[in] u4_round_factor |
251 | | * Quantization Round factor |
252 | | * |
253 | | * @param[out] pu1_nnz |
254 | | * Total non-zero coefficients in the current sub-block |
255 | | * |
256 | | * @param[in] pi2_alt_dc_addr |
257 | | * DC Coefficient of the block |
258 | | * |
259 | | * @remarks none |
260 | | * |
261 | | ******************************************************************************* |
262 | | */ |
263 | | void ih264_resi_trans_quant_chroma_4x4(UWORD8 *pu1_src, |
264 | | UWORD8 *pu1_pred, |
265 | | WORD16 *pi2_out, |
266 | | WORD32 src_strd, |
267 | | WORD32 pred_strd, |
268 | | const UWORD16 *pu2_scale_matrix, |
269 | | const UWORD16 *pu2_threshold_matrix, |
270 | | UWORD32 u4_qbits, |
271 | | UWORD32 u4_round_factor, |
272 | | UWORD8 *pu1_nnz, |
273 | | WORD16 *pu1_dc_alt_addr) |
274 | 0 | { |
275 | 0 | UWORD32 i; |
276 | 0 | WORD32 x0, x1, x2, x3, x4, x5, x6, x7; |
277 | 0 | WORD32 i4_value; |
278 | 0 | WORD16 *pi2_out_tmp = pi2_out; |
279 | 0 | UWORD32 u4_nonzero_coeff = 0; |
280 | |
|
281 | 0 | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
282 | 0 | { |
283 | | /* computing prediction error (residue) */ |
284 | 0 | x4 = pu1_src[0] - pu1_pred[0]; |
285 | 0 | x5 = pu1_src[2] - pu1_pred[2]; |
286 | 0 | x6 = pu1_src[4] - pu1_pred[4]; |
287 | 0 | x7 = pu1_src[6] - pu1_pred[6]; |
288 | | |
289 | | /* Horizontal transform */ |
290 | 0 | x0 = x4 + x7; |
291 | 0 | x1 = x5 + x6; |
292 | 0 | x2 = x5 - x6; |
293 | 0 | x3 = x4 - x7; |
294 | |
|
295 | 0 | pi2_out_tmp[0] = x0 + x1; |
296 | 0 | pi2_out_tmp[1] = (x3 << 1) + x2; |
297 | 0 | pi2_out_tmp[2] = x0 - x1; |
298 | 0 | pi2_out_tmp[3] = x3 - (x2 << 1); |
299 | | |
300 | | /* pointing to next row; */ |
301 | 0 | pu1_src += src_strd; |
302 | 0 | pu1_pred += pred_strd; |
303 | 0 | pi2_out_tmp += 4; |
304 | 0 | } |
305 | |
|
306 | 0 | pi2_out_tmp = pi2_out; |
307 | 0 | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
308 | 0 | { |
309 | | /* Vertical transform and quantization */ |
310 | 0 | x4 = pi2_out_tmp[0]; |
311 | 0 | x5 = pi2_out_tmp[4]; |
312 | 0 | x6 = pi2_out_tmp[8]; |
313 | 0 | x7 = pi2_out_tmp[12]; |
314 | |
|
315 | 0 | x0 = x4 + x7; |
316 | 0 | x1 = x5 + x6; |
317 | 0 | x2 = x5 - x6; |
318 | 0 | x3 = x4 - x7; |
319 | | |
320 | | /* quantization is done in place */ |
321 | 0 | i4_value = x0 + x1; |
322 | 0 | if(i == 0) |
323 | 0 | { |
324 | 0 | *pu1_dc_alt_addr = i4_value; |
325 | 0 | } |
326 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
327 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
328 | 0 | u4_nonzero_coeff); |
329 | 0 | pi2_out_tmp[0] = i4_value; |
330 | |
|
331 | 0 | i4_value = (x3 << 1) + x2; |
332 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[4], |
333 | 0 | pu2_scale_matrix[4], u4_round_factor, u4_qbits, |
334 | 0 | u4_nonzero_coeff); |
335 | 0 | pi2_out_tmp[4] = i4_value; |
336 | |
|
337 | 0 | i4_value = x0 - x1; |
338 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[8], |
339 | 0 | pu2_scale_matrix[8], u4_round_factor, u4_qbits, |
340 | 0 | u4_nonzero_coeff); |
341 | 0 | pi2_out_tmp[8] = i4_value; |
342 | |
|
343 | 0 | i4_value = x3 - (x2 << 1); |
344 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[12], |
345 | 0 | pu2_scale_matrix[12], u4_round_factor, u4_qbits, |
346 | 0 | u4_nonzero_coeff); |
347 | 0 | pi2_out_tmp[12] = i4_value; |
348 | |
|
349 | 0 | pi2_out_tmp++; |
350 | 0 | pu2_scale_matrix++; |
351 | 0 | pu2_threshold_matrix++; |
352 | 0 | } |
353 | | |
354 | | /* Return total nonzero coefficients in the current sub block */ |
355 | 0 | *pu1_nnz = u4_nonzero_coeff; |
356 | 0 | } |
357 | | |
358 | | /** |
359 | | ******************************************************************************* |
360 | | * |
361 | | * @brief |
362 | | * This function performs forward hadamard transform and quantization on a |
363 | | * 4x4 block |
364 | | * |
365 | | * @par Description: |
366 | | * The function accepts source buffer and estimation buffer. From these, it |
367 | | * computes the residue. This is residue is then transformed and quantized. |
368 | | * The transform and quantization are in placed computed. They use the residue |
369 | | * buffer for this. |
370 | | * |
371 | | * @param[in] pu1_src |
372 | | * Pointer to source sub-block |
373 | | * |
374 | | * @param[in] pi2_dst |
375 | | * Pointer to destination sub-block |
376 | | * |
377 | | * @param[in] pu2_threshold_matrix |
378 | | * Pointer to Forward Quant Threshold Matrix |
379 | | * |
380 | | * @param[in] pu2_scale_matrix |
381 | | * Pointer to Forward Quant Scale Matrix |
382 | | * |
383 | | * @param[in] u4_qbits |
384 | | * QP_BITS_h264_4x4 + floor(QP/6) |
385 | | * |
386 | | * @param[in] u4_round_factor |
387 | | * Quantization Round factor |
388 | | * |
389 | | * @param[out] pu1_nnz |
390 | | * Total non-zero coefficients in the current sub-block |
391 | | * |
392 | | * @remarks none |
393 | | * |
394 | | ******************************************************************************** |
395 | | */ |
396 | | void ih264_hadamard_quant_4x4(WORD16 *pi2_src, |
397 | | WORD16 *pi2_dst, |
398 | | const UWORD16 *pu2_scale_matrix, |
399 | | const UWORD16 *pu2_threshold_matrix, |
400 | | UWORD32 u4_qbits, |
401 | | UWORD32 u4_round_factor, |
402 | | UWORD8 *pu1_nnz) |
403 | 0 | { |
404 | 0 | WORD32 i; |
405 | 0 | WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value; |
406 | |
|
407 | 0 | *pu1_nnz = 0; |
408 | |
|
409 | 0 | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
410 | 0 | { |
411 | 0 | x4 = pi2_src[0]; |
412 | 0 | x5 = pi2_src[1]; |
413 | 0 | x6 = pi2_src[2]; |
414 | 0 | x7 = pi2_src[3]; |
415 | |
|
416 | 0 | x0 = x4 + x7; |
417 | 0 | x1 = x5 + x6; |
418 | 0 | x2 = x5 - x6; |
419 | 0 | x3 = x4 - x7; |
420 | |
|
421 | 0 | pi2_dst[0] = x0 + x1; |
422 | 0 | pi2_dst[1] = x3 + x2; |
423 | 0 | pi2_dst[2] = x0 - x1; |
424 | 0 | pi2_dst[3] = x3 - x2; |
425 | |
|
426 | 0 | pi2_src += 4; |
427 | 0 | pi2_dst += 4; |
428 | 0 | } |
429 | | |
430 | | /* Vertical transform and quantization */ |
431 | 0 | pi2_dst -= SUB_BLK_WIDTH_4x4 << 2; |
432 | |
|
433 | 0 | for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) |
434 | 0 | { |
435 | 0 | x4 = pi2_dst[0]; |
436 | 0 | x5 = pi2_dst[4]; |
437 | 0 | x6 = pi2_dst[8]; |
438 | 0 | x7 = pi2_dst[12]; |
439 | |
|
440 | 0 | x0 = x4 + x7; |
441 | 0 | x1 = x5 + x6; |
442 | 0 | x2 = x5 - x6; |
443 | 0 | x3 = x4 - x7; |
444 | |
|
445 | 0 | i4_value = (x0 + x1) >> 1; |
446 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
447 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); |
448 | 0 | pi2_dst[0] = i4_value; |
449 | |
|
450 | 0 | i4_value = (x3 + x2) >> 1; |
451 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
452 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); |
453 | 0 | pi2_dst[4] = i4_value; |
454 | |
|
455 | 0 | i4_value = (x0 - x1) >> 1; |
456 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
457 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); |
458 | 0 | pi2_dst[8] = i4_value; |
459 | |
|
460 | 0 | i4_value = (x3 - x2) >> 1; |
461 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
462 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); |
463 | 0 | pi2_dst[12] = i4_value; |
464 | |
|
465 | 0 | pi2_dst++; |
466 | 0 | } |
467 | 0 | } |
468 | | |
469 | | /** |
470 | | ******************************************************************************* |
471 | | * |
472 | | * @brief |
473 | | * This function performs forward hadamard transform and quantization on a |
474 | | * 2x2 block for both U and V planes |
475 | | * |
476 | | * @par Description: |
477 | | * The function accepts source buffer and estimation buffer. From these, it |
478 | | * computes the residue. This is residue is then transformed and quantized. |
479 | | * The transform and quantization are in placed computed. They use the residue |
480 | | * buffer for this. |
481 | | * |
482 | | * @param[in] pu1_src |
483 | | * Pointer to source sub-block |
484 | | * |
485 | | * @param[in] pi2_dst |
486 | | * Pointer to destination sub-block |
487 | | * |
488 | | * @param[in] pu2_threshold_matrix |
489 | | * Pointer to Forward Quant Threshold Matrix |
490 | | * |
491 | | * @param[in] pu2_scale_matrix |
492 | | * Pointer to Forward Quant Scale Matrix |
493 | | * |
494 | | * @param[in] u4_qbits |
495 | | * QP_BITS_h264_4x4 + floor(QP/6) |
496 | | * |
497 | | * @param[in] u4_round_factor |
498 | | * Quantization Round factor |
499 | | * |
500 | | * @param[out] pu1_nnz |
501 | | * Total non-zero coefficients in the current sub-block |
502 | | * |
503 | | * @remarks |
504 | | * NNZ for dc is populated at 0 and 5th position of pu1_nnz |
505 | | * |
506 | | ******************************************************************************* |
507 | | */ |
508 | | void ih264_hadamard_quant_2x2_uv(WORD16 *pi2_src, |
509 | | WORD16 *pi2_dst, |
510 | | const UWORD16 *pu2_scale_matrix, |
511 | | const UWORD16 *pu2_threshold_matrix, |
512 | | UWORD32 u4_qbits, |
513 | | UWORD32 u4_round_factor, |
514 | | UWORD8 *pu1_nnz) |
515 | 0 | { |
516 | 0 | WORD32 x0, x1, x2, x3, x4, x5, x6, x7; |
517 | 0 | WORD32 i4_value, plane; |
518 | |
|
519 | 0 | for(plane = 0; plane < 2; plane++) |
520 | 0 | { |
521 | 0 | pu1_nnz[plane] = 0; |
522 | | |
523 | | /* Horizontal transform */ |
524 | 0 | x4 = pi2_src[0]; |
525 | 0 | x5 = pi2_src[1]; |
526 | 0 | x6 = pi2_src[2]; |
527 | 0 | x7 = pi2_src[3]; |
528 | |
|
529 | 0 | x0 = x4 + x5; |
530 | 0 | x1 = x4 - x5; |
531 | 0 | x2 = x6 + x7; |
532 | 0 | x3 = x6 - x7; |
533 | | |
534 | | /* Vertical transform and quantization */ |
535 | 0 | i4_value = (x0 + x2); |
536 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
537 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
538 | 0 | pu1_nnz[plane]); |
539 | 0 | pi2_dst[0] = i4_value; |
540 | |
|
541 | 0 | i4_value = (x0 - x2); |
542 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
543 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
544 | 0 | pu1_nnz[plane]); |
545 | 0 | pi2_dst[2] = i4_value; |
546 | |
|
547 | 0 | i4_value = (x1 - x3); |
548 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
549 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
550 | 0 | pu1_nnz[plane]); |
551 | 0 | pi2_dst[3] = i4_value; |
552 | |
|
553 | 0 | i4_value = (x1 + x3); |
554 | 0 | FWD_QUANT(i4_value, pu2_threshold_matrix[0], |
555 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
556 | 0 | pu1_nnz[plane]); |
557 | 0 | pi2_dst[1] = i4_value; |
558 | |
|
559 | 0 | pi2_dst += 4; |
560 | 0 | pi2_src += 4; |
561 | 0 | } |
562 | 0 | } |
563 | | |
564 | | /** |
565 | | ******************************************************************************* |
566 | | * |
567 | | * @brief |
568 | | * This function performs Single stage forward transform CF8 and quantization |
569 | | * on 8x8 blocks |
570 | | * |
571 | | * @par Description: |
572 | | * Performs single stage 8x8 forward transform CF8 after calculating the residue |
573 | | * The result is then quantized |
574 | | * |
575 | | * @param[in] pu1_src |
576 | | * Pointer to source sub-block |
577 | | * |
578 | | * @param[in] pu1_pred |
579 | | * Pointer to prediction sub-block |
580 | | * |
581 | | * @param[in] pi2_out |
582 | | * Pointer to residual sub-block |
583 | | * |
584 | | * @param[in] src_strd |
585 | | * Source stride |
586 | | * |
587 | | * @param[in] pred_strd |
588 | | * Prediction stride |
589 | | * |
590 | | * @param[in] pu2_scale_matrix |
591 | | * Pointer to Forward Quant Scale Matrix |
592 | | * |
593 | | * @param[in] pu2_threshold_matrix |
594 | | * Pointer to Forward Quant Threshold Matrix |
595 | | * |
596 | | * @param[in] u4_qbits |
597 | | * QP_BITS_h264_8x8 + floor(QP/6) |
598 | | * |
599 | | * @param[in] u4_round_factor |
600 | | * Quantization Round factor |
601 | | * |
602 | | * @param[out] pu1_nnz |
603 | | * Total non-zero coefficients in the current sub-block |
604 | | * |
605 | | * @param[in] pi2_alt_dc_addr |
606 | | * UNUSED |
607 | | * |
608 | | * @returns none |
609 | | * |
610 | | * @remarks: |
611 | | * TODO: This function needs to be tested before integration |
612 | | * |
613 | | ******************************************************************************* |
614 | | */ |
615 | | void ih264_resi_trans_quant_8x8(UWORD8 *pu1_src, |
616 | | UWORD8 *pu1_pred, |
617 | | WORD16 *pi2_out, |
618 | | WORD32 src_strd, |
619 | | WORD32 pred_strd, |
620 | | const UWORD16 *pu2_scale_matrix, |
621 | | const UWORD16 *pu2_threshold_matrix, |
622 | | UWORD32 u4_qbits, |
623 | | UWORD32 u4_round_factor, |
624 | | UWORD8 *pu1_nnz, |
625 | | WORD16 *pu1_dc_alt_addr) |
626 | 0 | { |
627 | 0 | WORD16 *pi2_out_tmp = pi2_out; |
628 | 0 | WORD32 i; |
629 | 0 | WORD32 a0, a1, a2, a3, a4, a5, a6, a7; |
630 | 0 | WORD32 r0, r1, r2, r3, r4, r5, r6, r7; |
631 | 0 | UWORD32 u4_nonzero_coeff = 0; |
632 | |
|
633 | 0 | UNUSED(pu1_dc_alt_addr); |
634 | | |
635 | | /* Horizontal transform */ |
636 | 0 | for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) |
637 | 0 | { |
638 | 0 | r0 = pu1_src[0]; |
639 | 0 | r0 -= pu1_pred[0]; |
640 | 0 | r1 = pu1_src[1]; |
641 | 0 | r1 -= pu1_pred[1]; |
642 | 0 | r2 = pu1_src[2]; r2 -= pu1_pred[2]; |
643 | 0 | r3 = pu1_src[3]; r3 -= pu1_pred[3]; |
644 | 0 | r4 = pu1_src[4]; r4 -= pu1_pred[4]; |
645 | 0 | r5 = pu1_src[5]; r5 -= pu1_pred[5]; |
646 | 0 | r6 = pu1_src[6]; r6 -= pu1_pred[6]; |
647 | 0 | r7 = pu1_src[7]; r7 -= pu1_pred[7]; |
648 | |
|
649 | 0 | a0 = r0 + r7; |
650 | 0 | a1 = r1 + r6; |
651 | 0 | a2 = r2 + r5; |
652 | 0 | a3 = r3 + r4; |
653 | |
|
654 | 0 | a4 = a0 + a3; |
655 | 0 | a5 = a1 + a2; |
656 | 0 | a6 = a0 - a3; |
657 | 0 | a7 = a1 - a2; |
658 | |
|
659 | 0 | pi2_out_tmp[0] = a4 + a5; |
660 | 0 | pi2_out_tmp[2] = a6 + (a7 >> 1); |
661 | 0 | pi2_out_tmp[4] = a4 - a5; |
662 | 0 | pi2_out_tmp[6] = (a6 >> 1) - a7; |
663 | |
|
664 | 0 | a0 = r0 - r7; |
665 | 0 | a1 = r1 - r6; |
666 | 0 | a2 = r2 - r5; |
667 | 0 | a3 = r3 - r4; |
668 | |
|
669 | 0 | a4 = a1 + a2 + ((a0 >> 1) + a0); |
670 | 0 | a5 = a0 - a3 - ((a2 >> 1) + a2); |
671 | 0 | a6 = a0 + a3 - ((a1 >> 1) + a1); |
672 | 0 | a7 = a1 - a2 + ((a3 >> 1) + a3); |
673 | |
|
674 | 0 | pi2_out_tmp[1] = a4 + (a7 >> 2); |
675 | 0 | pi2_out_tmp[3] = a5 + (a6 >> 2); |
676 | 0 | pi2_out_tmp[5] = a6 - (a5 >> 2); |
677 | 0 | pi2_out_tmp[7] = (a4 >> 2) - a7; |
678 | |
|
679 | 0 | pu1_src += src_strd; |
680 | 0 | pu1_pred += pred_strd; |
681 | 0 | pi2_out_tmp += 8; |
682 | 0 | } |
683 | | |
684 | | /* vertical transform and quant */ |
685 | 0 | pi2_out_tmp = pi2_out; |
686 | 0 | for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) |
687 | 0 | { |
688 | 0 | r0 = pi2_out_tmp[0]; |
689 | 0 | r1 = pi2_out_tmp[8]; |
690 | 0 | r2 = pi2_out_tmp[16]; |
691 | 0 | r3 = pi2_out_tmp[24]; |
692 | 0 | r4 = pi2_out_tmp[32]; |
693 | 0 | r5 = pi2_out_tmp[40]; |
694 | 0 | r6 = pi2_out_tmp[48]; |
695 | 0 | r7 = pi2_out_tmp[56]; |
696 | |
|
697 | 0 | a0 = r0 + r7; |
698 | 0 | a1 = r1 + r6; |
699 | 0 | a2 = r2 + r5; |
700 | 0 | a3 = r3 + r4; |
701 | |
|
702 | 0 | a4 = a0 + a3; |
703 | 0 | a5 = a1 + a2; |
704 | 0 | a6 = a0 - a3; |
705 | 0 | a7 = a1 - a2; |
706 | |
|
707 | 0 | a0 = r0 - r7; |
708 | 0 | a1 = r1 - r6; |
709 | 0 | a2 = r2 - r5; |
710 | 0 | a3 = r3 - r4; |
711 | |
|
712 | 0 | r0 = a4 + a5; |
713 | 0 | r2 = a6 + (a7 >> 1); |
714 | 0 | r4 = a4 - a5; |
715 | 0 | r6 = (a6 >> 1) - a7; |
716 | |
|
717 | 0 | a4 = a1 + a2 + ((a0 >> 1) + a0); |
718 | 0 | a5 = a0 - a3 - ((a2 >> 1) + a2); |
719 | 0 | a6 = a0 + a3 - ((a1 >> 1) + a1); |
720 | 0 | a7 = a1 - a2 + ((a3 >> 1) + a3); |
721 | |
|
722 | 0 | r1 = a4 + (a7 >> 2); |
723 | 0 | r3 = a5 + (a6 >> 2); |
724 | 0 | r5 = a6 - (a5 >> 2); |
725 | 0 | r7 = (a4 >> 2) - a7; |
726 | |
|
727 | 0 | FWD_QUANT(r0, pu2_threshold_matrix[0], |
728 | 0 | pu2_scale_matrix[0], u4_round_factor, u4_qbits, |
729 | 0 | u4_nonzero_coeff); |
730 | 0 | pi2_out_tmp[0] = r0; |
731 | |
|
732 | 0 | FWD_QUANT(r1, pu2_threshold_matrix[8], |
733 | 0 | pu2_scale_matrix[8], u4_round_factor, u4_qbits, |
734 | 0 | u4_nonzero_coeff); |
735 | 0 | pi2_out_tmp[8] = r1; |
736 | |
|
737 | 0 | FWD_QUANT(r2, pu2_threshold_matrix[16], |
738 | 0 | pu2_scale_matrix[16], u4_round_factor, u4_qbits, |
739 | 0 | u4_nonzero_coeff); |
740 | 0 | pi2_out_tmp[16] = r2; |
741 | |
|
742 | 0 | FWD_QUANT(r3, pu2_threshold_matrix[24], |
743 | 0 | pu2_scale_matrix[24], u4_round_factor, u4_qbits, |
744 | 0 | u4_nonzero_coeff); |
745 | 0 | pi2_out_tmp[24] = r3; |
746 | |
|
747 | 0 | FWD_QUANT(r4, pu2_threshold_matrix[32], |
748 | 0 | pu2_scale_matrix[32], u4_round_factor, u4_qbits, |
749 | 0 | u4_nonzero_coeff); |
750 | 0 | pi2_out_tmp[32] = r4; |
751 | |
|
752 | 0 | FWD_QUANT(r5, pu2_threshold_matrix[40], |
753 | 0 | pu2_scale_matrix[40], u4_round_factor, u4_qbits, |
754 | 0 | u4_nonzero_coeff); |
755 | 0 | pi2_out_tmp[40] = r5; |
756 | |
|
757 | 0 | FWD_QUANT(r6, pu2_threshold_matrix[48], |
758 | 0 | pu2_scale_matrix[48], u4_round_factor, u4_qbits, |
759 | 0 | u4_nonzero_coeff); |
760 | 0 | pi2_out_tmp[48] = r6; |
761 | |
|
762 | 0 | FWD_QUANT(r7, pu2_threshold_matrix[56], |
763 | 0 | pu2_scale_matrix[56], u4_round_factor, u4_qbits, |
764 | 0 | u4_nonzero_coeff); |
765 | 0 | pi2_out_tmp[56] = r7; |
766 | |
|
767 | 0 | pi2_out_tmp++; |
768 | 0 | pu2_scale_matrix++; |
769 | 0 | pu2_threshold_matrix++; |
770 | 0 | } |
771 | | /* Return total nonzero coefficients in the current sub block */ |
772 | 0 | *pu1_nnz = u4_nonzero_coeff; |
773 | 0 | } |