/src/libhevc/decoder/ihevcd_iquant_itrans_recon_ctb.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevcd_iquant_itrans_recon_ctb.c |
22 | | * |
23 | | * @brief |
24 | | * Contains functions for inverse quantization, inverse transform and recon |
25 | | * |
26 | | * @author |
27 | | * Ittiam |
28 | | * |
29 | | * @par List of Functions: |
30 | | * - ihevcd_iquant_itrans_recon_ctb() |
31 | | * |
32 | | * @remarks |
33 | | * None |
34 | | * |
35 | | ******************************************************************************* |
36 | | */ |
37 | | /*****************************************************************************/ |
38 | | /* File Includes */ |
39 | | /*****************************************************************************/ |
40 | | #include <stdio.h> |
41 | | #include <stddef.h> |
42 | | #include <stdlib.h> |
43 | | #include <string.h> |
44 | | |
45 | | #include "ihevc_typedefs.h" |
46 | | #include "iv.h" |
47 | | #include "ivd.h" |
48 | | #include "ihevcd_cxa.h" |
49 | | |
50 | | #include "ihevc_defs.h" |
51 | | #include "ihevc_debug.h" |
52 | | #include "ihevc_structs.h" |
53 | | #include "ihevc_cabac_tables.h" |
54 | | #include "ihevc_macros.h" |
55 | | #include "ihevc_platform_macros.h" |
56 | | |
57 | | #include "ihevcd_defs.h" |
58 | | #include "ihevcd_function_selector.h" |
59 | | #include "ihevcd_structs.h" |
60 | | #include "ihevcd_error.h" |
61 | | #include "ihevcd_bitstream.h" |
62 | | #include "ihevc_common_tables.h" |
63 | | |
64 | | /* Intra pred includes */ |
65 | | #include "ihevc_intra_pred.h" |
66 | | |
67 | | /* Inverse transform common module includes */ |
68 | | #include "ihevc_trans_tables.h" |
69 | | #include "ihevc_trans_macros.h" |
70 | | #include "ihevc_itrans_recon.h" |
71 | | #include "ihevc_recon.h" |
72 | | #include "ihevc_chroma_itrans_recon.h" |
73 | | #include "ihevc_chroma_recon.h" |
74 | | |
75 | | /* Decoder includes */ |
76 | | #include "ihevcd_common_tables.h" |
77 | | #include "ihevcd_iquant_itrans_recon_ctb.h" |
78 | | #include "ihevcd_debug.h" |
79 | | #include "ihevcd_profile.h" |
80 | | #include "ihevcd_statistics.h" |
81 | | #include "ihevcd_itrans_recon_dc.h" |
82 | | |
83 | | |
84 | | /* Globals */ |
85 | | static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] = |
86 | | { IP_FUNC_MODE_0, /* Mode 0 */ |
87 | | IP_FUNC_MODE_1, /* Mode 1 */ |
88 | | IP_FUNC_MODE_2, /* Mode 2 */ |
89 | | IP_FUNC_MODE_3TO9, /* Mode 3 */ |
90 | | IP_FUNC_MODE_3TO9, /* Mode 4 */ |
91 | | IP_FUNC_MODE_3TO9, /* Mode 5 */ |
92 | | IP_FUNC_MODE_3TO9, /* Mode 6 */ |
93 | | IP_FUNC_MODE_3TO9, /* Mode 7 */ |
94 | | IP_FUNC_MODE_3TO9, /* Mode 8 */ |
95 | | IP_FUNC_MODE_3TO9, /* Mode 9 */ |
96 | | IP_FUNC_MODE_10, /* Mode 10 */ |
97 | | IP_FUNC_MODE_11TO17, /* Mode 11 */ |
98 | | IP_FUNC_MODE_11TO17, /* Mode 12 */ |
99 | | IP_FUNC_MODE_11TO17, /* Mode 13 */ |
100 | | IP_FUNC_MODE_11TO17, /* Mode 14 */ |
101 | | IP_FUNC_MODE_11TO17, /* Mode 15 */ |
102 | | IP_FUNC_MODE_11TO17, /* Mode 16 */ |
103 | | IP_FUNC_MODE_11TO17, /* Mode 17 */ |
104 | | IP_FUNC_MODE_18_34, /* Mode 18 */ |
105 | | IP_FUNC_MODE_19TO25, /* Mode 19 */ |
106 | | IP_FUNC_MODE_19TO25, /* Mode 20 */ |
107 | | IP_FUNC_MODE_19TO25, /* Mode 21 */ |
108 | | IP_FUNC_MODE_19TO25, /* Mode 22 */ |
109 | | IP_FUNC_MODE_19TO25, /* Mode 23 */ |
110 | | IP_FUNC_MODE_19TO25, /* Mode 24 */ |
111 | | IP_FUNC_MODE_19TO25, /* Mode 25 */ |
112 | | IP_FUNC_MODE_26, /* Mode 26 */ |
113 | | IP_FUNC_MODE_27TO33, /* Mode 27 */ |
114 | | IP_FUNC_MODE_27TO33, /* Mode 26 */ |
115 | | IP_FUNC_MODE_27TO33, /* Mode 29 */ |
116 | | IP_FUNC_MODE_27TO33, /* Mode 30 */ |
117 | | IP_FUNC_MODE_27TO33, /* Mode 31 */ |
118 | | IP_FUNC_MODE_27TO33, /* Mode 32 */ |
119 | | IP_FUNC_MODE_27TO33, /* Mode 33 */ |
120 | | IP_FUNC_MODE_18_34, /* Mode 34 */ |
121 | | }; |
122 | | |
123 | | |
124 | | const WORD16 *g_ai2_ihevc_trans_tables[] = |
125 | | { &g_ai2_ihevc_trans_dst_4[0][0], |
126 | | &g_ai2_ihevc_trans_4[0][0], |
127 | | &g_ai2_ihevc_trans_8[0][0], |
128 | | &g_ai2_ihevc_trans_16[0][0], |
129 | | &g_ai2_ihevc_trans_32[0][0] |
130 | | }; |
131 | | |
132 | | |
133 | | /*****************************************************************************/ |
134 | | /* Structures */ |
135 | | /*****************************************************************************/ |
136 | | /** |
137 | | * Structure to hold fields required for iq it recon construction process |
138 | | */ |
139 | | typedef struct |
140 | | { |
141 | | /* |
142 | | * parsed transform coeffs |
143 | | */ |
144 | | WORD16 *pi2_tu_coeff; |
145 | | |
146 | | /** |
147 | | * pred buffer |
148 | | */ |
149 | | UWORD8 *pu1_pred; |
150 | | |
151 | | /** |
152 | | * recon buffer |
153 | | */ |
154 | | UWORD8 *pu1_dst; |
155 | | |
156 | | /** |
157 | | * transform coeffs buffer stride |
158 | | */ |
159 | | WORD32 tu_coeff_stride; |
160 | | |
161 | | /** |
162 | | * pred buffer stride |
163 | | */ |
164 | | WORD32 pred_strd; |
165 | | |
166 | | /** |
167 | | * recon buffer stride |
168 | | */ |
169 | | WORD32 dst_strd; |
170 | | |
171 | | /** |
172 | | * zero cols, zero rows for optimizing itrans process |
173 | | */ |
174 | | UWORD32 zero_cols; |
175 | | UWORD32 zero_rows; |
176 | | |
177 | | /** |
178 | | * dc only? for optimizing itrans process |
179 | | */ |
180 | | UWORD32 coeff_type; |
181 | | WORD16 coeff_value; |
182 | | |
183 | | /** |
184 | | * cbf |
185 | | */ |
186 | | UWORD8 cbf; |
187 | | |
188 | | /** |
189 | | * is transform skip |
190 | | */ |
191 | | UWORD8 transform_skip_flag; |
192 | | |
193 | | #ifdef ENABLE_MAIN_REXT_PROFILE |
194 | | /** |
195 | | * is explicit rdpcm enabled |
196 | | */ |
197 | | UWORD8 explicit_rdpcm_flag; |
198 | | |
199 | | /** |
200 | | * explicit rdpcm dir |
201 | | */ |
202 | | UWORD8 explicit_rdpcm_dir; |
203 | | #endif |
204 | | |
205 | | } tu_plane_iq_it_recon_ctxt_t; |
206 | | |
207 | | |
208 | | /*****************************************************************************/ |
209 | | /* Function Prototypes */ |
210 | | /*****************************************************************************/ |
211 | | typedef void (*PF_IQITRECON_PLANE)(process_ctxt_t *ps_proc, |
212 | | tu_t *ps_tu, |
213 | | tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, |
214 | | WORD32 func_idx, |
215 | | WORD32 log2_trans_size, |
216 | | CHROMA_PLANE_ID_T chroma_plane, |
217 | | WORD8 intra_flag, |
218 | | WORD8 intra_pred_mode); |
219 | | |
220 | | /* Returns number of ai2_level read from ps_sblk_coeff */ |
221 | | UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff, |
222 | | WORD32 log2_trans_size, |
223 | | UWORD8 *pu1_tu_coeff_data, |
224 | | WORD16 *pi2_dequant_matrix, |
225 | | WORD32 qp_rem, |
226 | | WORD32 qp_div, |
227 | | TRANSFORM_TYPE e_trans_type, |
228 | | WORD32 trans_quant_bypass, |
229 | | UWORD32 *pu4_zero_cols, |
230 | | UWORD32 *pu4_zero_rows, |
231 | | UWORD32 *pu4_coeff_type, |
232 | | WORD16 *pi2_coeff_value) |
233 | 2.46M | { |
234 | | /* Generating coeffs from coeff-map */ |
235 | 2.46M | WORD32 i; |
236 | 2.46M | WORD16 *pi2_sblk_ptr; |
237 | 2.46M | WORD32 subblk_pos_x, subblk_pos_y; |
238 | 2.46M | WORD32 sblk_scan_idx, coeff_raster_idx; |
239 | 2.46M | WORD32 sblk_non_zero_coeff_idx; |
240 | 2.46M | tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data; |
241 | 2.46M | UWORD8 u1_num_coded_sblks, u1_scan_type; |
242 | 2.46M | UWORD8 *pu1_new_tu_coeff_data; |
243 | 2.46M | WORD32 trans_size; |
244 | 2.46M | WORD32 xs, ys; |
245 | 2.46M | WORD32 trans_skip; |
246 | 2.46M | WORD16 iquant_out; |
247 | 2.46M | WORD32 shift_iq; |
248 | 2.46M | { |
249 | 2.46M | WORD32 bit_depth; |
250 | | |
251 | 2.46M | bit_depth = 8 + 0; |
252 | 2.46M | shift_iq = bit_depth + log2_trans_size - 5; |
253 | 2.46M | } |
254 | 2.46M | trans_size = (1 << log2_trans_size); |
255 | | |
256 | | /* First byte points to number of coded blocks */ |
257 | 2.46M | u1_num_coded_sblks = *pu1_tu_coeff_data++; |
258 | | |
259 | | /* Next byte points to scan type */ |
260 | 2.46M | u1_scan_type = *pu1_tu_coeff_data++; |
261 | | /* 0th bit has trans_skip */ |
262 | 2.46M | trans_skip = u1_scan_type & 1; |
263 | 2.46M | #ifdef ENABLE_MAIN_REXT_PROFILE |
264 | 2.46M | u1_scan_type = (u1_scan_type & 0xF) >> 1; |
265 | | #else |
266 | | u1_scan_type >>= 1; |
267 | | #endif |
268 | | |
269 | 2.46M | pi2_sblk_ptr = pi2_tu_coeff; |
270 | | |
271 | | /* Initially all columns are assumed to be zero */ |
272 | 2.46M | *pu4_zero_cols = 0xFFFFFFFF; |
273 | | /* Initially all rows are assumed to be zero */ |
274 | 2.46M | *pu4_zero_rows = 0xFFFFFFFF; |
275 | | |
276 | 2.46M | ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data); |
277 | | |
278 | 2.46M | if(trans_skip) |
279 | 373k | memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16)); |
280 | | |
281 | 2.46M | STATS_INIT_SBLK_AND_COEFF_POS(); |
282 | | |
283 | | /* DC only case */ |
284 | 2.46M | if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks) |
285 | 1.34M | && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos) |
286 | 1.27M | && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map)) |
287 | 490k | { |
288 | 490k | *pu4_coeff_type = 1; |
289 | | |
290 | 490k | if(!trans_quant_bypass) |
291 | 475k | { |
292 | 475k | if(4 == trans_size) |
293 | 198k | { |
294 | 198k | IQUANT_4x4(iquant_out, |
295 | 198k | ps_tu_sblk_coeff_data->ai2_level[0], |
296 | 198k | pi2_dequant_matrix[0] |
297 | 198k | * g_ihevc_iquant_scales[qp_rem], |
298 | 198k | shift_iq, qp_div); |
299 | 198k | } |
300 | 276k | else |
301 | 276k | { |
302 | 276k | IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0], |
303 | 276k | pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem], |
304 | 276k | shift_iq, qp_div); |
305 | 276k | } |
306 | 475k | if(trans_skip) |
307 | 53.3k | iquant_out = (iquant_out + 16) >> 5; |
308 | 475k | } |
309 | 15.1k | else |
310 | 15.1k | { |
311 | | /* setting the column to zero */ |
312 | 174k | for(i = 0; i < trans_size; i++) |
313 | 159k | *(pi2_tu_coeff + i * trans_size) = 0; |
314 | | |
315 | 15.1k | iquant_out = ps_tu_sblk_coeff_data->ai2_level[0]; |
316 | 15.1k | } |
317 | 490k | *pi2_coeff_value = iquant_out; |
318 | 490k | *pi2_tu_coeff = iquant_out; |
319 | 490k | *pu4_zero_cols &= ~0x1; |
320 | 490k | *pu4_zero_rows &= ~0x1; |
321 | 490k | ps_tu_sblk_coeff_data = |
322 | 490k | (void *)&ps_tu_sblk_coeff_data->ai2_level[1]; |
323 | | |
324 | 490k | STATS_UPDATE_COEFF_COUNT(); |
325 | 490k | STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0); |
326 | 490k | STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip)); |
327 | 490k | return ((UWORD8 *)ps_tu_sblk_coeff_data); |
328 | 490k | } |
329 | 1.97M | else |
330 | 1.97M | { |
331 | 1.97M | *pu4_coeff_type = 0; |
332 | | /* In case of trans skip, memset has already happened */ |
333 | 1.97M | if(!trans_skip) |
334 | 1.65M | memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16)); |
335 | 1.97M | } |
336 | | |
337 | 4.58M | for(i = 0; i < u1_num_coded_sblks; i++) |
338 | 2.60M | { |
339 | 2.60M | UWORD32 u4_sig_coeff_map; |
340 | 2.60M | subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF; |
341 | 2.60M | subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8; |
342 | | |
343 | 2.60M | STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y); |
344 | | |
345 | 2.60M | subblk_pos_x = subblk_pos_x * MIN_TU_SIZE; |
346 | 2.60M | subblk_pos_y = subblk_pos_y * MIN_TU_SIZE; |
347 | | |
348 | 2.60M | pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size |
349 | 2.60M | + subblk_pos_x; |
350 | | |
351 | | //*pu4_zero_cols &= ~(0xF << subblk_pos_x); |
352 | | |
353 | 2.60M | sblk_non_zero_coeff_idx = 0; |
354 | 2.60M | u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map; |
355 | | //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--) |
356 | 2.60M | sblk_scan_idx = 31; |
357 | 2.60M | do |
358 | 10.5M | { |
359 | 10.5M | WORD32 clz = CLZ(u4_sig_coeff_map); |
360 | | |
361 | 10.5M | sblk_scan_idx -= clz; |
362 | | /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */ |
363 | | /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */ |
364 | 10.5M | u4_sig_coeff_map = u4_sig_coeff_map << clz; |
365 | | /* Copying coeffs and storing in reverse order */ |
366 | 10.5M | { |
367 | 10.5M | STATS_UPDATE_COEFF_COUNT(); |
368 | 10.5M | coeff_raster_idx = |
369 | 10.5M | gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx]; |
370 | | |
371 | 10.5M | xs = coeff_raster_idx & 0x3; |
372 | 10.5M | ys = coeff_raster_idx >> 2; |
373 | | |
374 | 10.5M | if(!trans_quant_bypass) |
375 | 10.3M | { |
376 | 10.3M | if(4 == trans_size) |
377 | 4.74M | { |
378 | 4.74M | IQUANT_4x4(iquant_out, |
379 | 4.74M | ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx], |
380 | 4.74M | pi2_dequant_matrix[(subblk_pos_x + xs) |
381 | 4.74M | + (subblk_pos_y + ys) |
382 | 4.74M | * trans_size] |
383 | 4.74M | * g_ihevc_iquant_scales[qp_rem], |
384 | 4.74M | shift_iq, qp_div); |
385 | 4.74M | sblk_non_zero_coeff_idx++; |
386 | 4.74M | } |
387 | 5.63M | else |
388 | 5.63M | { |
389 | 5.63M | IQUANT(iquant_out, |
390 | 5.63M | ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx], |
391 | 5.63M | pi2_dequant_matrix[(subblk_pos_x + xs) |
392 | 5.63M | + (subblk_pos_y + ys) |
393 | 5.63M | * trans_size] |
394 | 5.63M | * g_ihevc_iquant_scales[qp_rem], |
395 | 5.63M | shift_iq, qp_div); |
396 | 5.63M | sblk_non_zero_coeff_idx++; |
397 | 5.63M | } |
398 | | |
399 | 10.3M | if(trans_skip) |
400 | 1.12M | iquant_out = (iquant_out + 16) >> 5; |
401 | 10.3M | } |
402 | 150k | else |
403 | 150k | { |
404 | 150k | iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++]; |
405 | 150k | } |
406 | 10.5M | *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs)); |
407 | 10.5M | *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys)); |
408 | 10.5M | *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out; |
409 | 10.5M | } |
410 | 10.5M | sblk_scan_idx--; |
411 | 10.5M | u4_sig_coeff_map <<= 1; |
412 | | |
413 | 10.5M | }while(u4_sig_coeff_map); |
414 | | /* Updating the sblk pointer */ |
415 | 2.60M | ps_tu_sblk_coeff_data = |
416 | 2.60M | (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx]; |
417 | 2.60M | } |
418 | | |
419 | 1.97M | STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip)); |
420 | | |
421 | 1.97M | pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data; |
422 | | |
423 | 1.97M | return pu1_new_tu_coeff_data; |
424 | 2.46M | } |
425 | | |
426 | | WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc, |
427 | | tu_t *ps_tu, |
428 | | UWORD32 *pu4_intra_nbr_avail, |
429 | | WORD16 i2_pic_width_in_luma_samples, |
430 | | UWORD8 i1_constrained_intra_pred_flag, |
431 | | WORD32 trans_size, |
432 | | WORD32 ctb_size) |
433 | 2.15M | { |
434 | 2.15M | sps_t *ps_sps; |
435 | 2.15M | UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail, |
436 | 2.15M | u1_top_lt_avail; |
437 | 2.15M | WORD32 x_cur, y_cur, x_nbr, y_nbr; |
438 | 2.15M | UWORD8 *pu1_nbr_intra_flag; |
439 | 2.15M | UWORD8 *pu1_pic_intra_flag; |
440 | 2.15M | UWORD8 top_right, top, top_left, left, bot_left; |
441 | 2.15M | WORD32 intra_pos; |
442 | 2.15M | WORD32 num_8_blks, num_8_blks_in_bits; |
443 | 2.15M | WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64; |
444 | 2.15M | WORD32 cur_x, cur_y; |
445 | 2.15M | WORD32 i; |
446 | 2.15M | WORD32 nbr_flags; |
447 | | |
448 | 2.15M | ps_sps = ps_proc->ps_sps; |
449 | 2.15M | cur_x = ps_tu->b4_pos_x; |
450 | 2.15M | cur_y = ps_tu->b4_pos_y; |
451 | | |
452 | 2.15M | u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE] |
453 | 2.15M | >> (31 - (1 + cur_x - 1))) & 1; |
454 | 2.15M | u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1))) |
455 | 2.15M | & 1; |
456 | 2.15M | u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x))) |
457 | 2.15M | & 1; |
458 | 2.15M | u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] |
459 | 2.15M | >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1; |
460 | 2.15M | u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] |
461 | 2.15M | >> (31 - (1 + cur_x - 1))) & 1; |
462 | | |
463 | 2.15M | x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE; |
464 | 2.15M | y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE; |
465 | | |
466 | 2.15M | pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag; |
467 | | |
468 | | /* WORD32 nbr_flags as below MSB --> LSB */ |
469 | | /* Top-Left | Top-Right | Top | Left | Bottom-Left |
470 | | * 1 4 4 4 4 |
471 | | */ |
472 | 2.15M | bot_left = 0; |
473 | 2.15M | left = 0; |
474 | 2.15M | top_right = 0; |
475 | 2.15M | top = 0; |
476 | 2.15M | top_left = 0; |
477 | | |
478 | 2.15M | num_8_blks = trans_size > 4 ? trans_size / 8 : 1; |
479 | 2.15M | num_8_blks_in_bits = ((1 << num_8_blks) - 1); |
480 | | |
481 | 2.15M | if(i1_constrained_intra_pred_flag) |
482 | 420k | { |
483 | | /* TODO: constrained intra pred not tested */ |
484 | 420k | if(u1_bot_lt_avail) |
485 | 117k | { |
486 | 117k | x_nbr = x_cur - 1; |
487 | 117k | y_nbr = y_cur + trans_size; |
488 | | |
489 | 117k | pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row |
490 | 117k | + x_nbr / 64; |
491 | 117k | intra_pos = ((x_nbr / 8) % 8); |
492 | 403k | for(i = 0; i < num_8_blks; i++) |
493 | 285k | { |
494 | 285k | bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) |
495 | 285k | >> intra_pos) & 1) << i; |
496 | 285k | } |
497 | 117k | bot_left &= num_8_blks_in_bits; |
498 | 117k | } |
499 | 420k | if(u1_left_avail) |
500 | 413k | { |
501 | 413k | x_nbr = x_cur - 1; |
502 | 413k | y_nbr = y_cur; |
503 | | |
504 | 413k | pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row |
505 | 413k | + x_nbr / 64; |
506 | 413k | intra_pos = ((x_nbr / 8) % 8); |
507 | | |
508 | 1.49M | for(i = 0; i < num_8_blks; i++) |
509 | 1.07M | { |
510 | 1.07M | left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos) |
511 | 1.07M | & 1) << i; |
512 | 1.07M | } |
513 | 413k | left &= num_8_blks_in_bits; |
514 | 413k | } |
515 | 420k | if(u1_top_avail) |
516 | 383k | { |
517 | 383k | x_nbr = x_cur; |
518 | 383k | y_nbr = y_cur - 1; |
519 | | |
520 | 383k | pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row |
521 | 383k | + x_nbr / 64; |
522 | 383k | intra_pos = ((x_nbr / 8) % 8); |
523 | | |
524 | 383k | top = (*pu1_nbr_intra_flag >> intra_pos); |
525 | 383k | top &= num_8_blks_in_bits; |
526 | | /* |
527 | | for(i=0;i<num_8_blks;i++) |
528 | | { |
529 | | top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i; |
530 | | } |
531 | | */ |
532 | 383k | } |
533 | 420k | if(u1_top_rt_avail) |
534 | 259k | { |
535 | 259k | x_nbr = x_cur + trans_size; |
536 | 259k | y_nbr = y_cur - 1; |
537 | | |
538 | 259k | pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row |
539 | 259k | + x_nbr / 64; |
540 | 259k | intra_pos = ((x_nbr / 8) % 8); |
541 | | |
542 | 259k | top_right = (*pu1_nbr_intra_flag >> intra_pos); |
543 | 259k | top_right &= num_8_blks_in_bits; |
544 | | /* |
545 | | for(i=0;i<num_8_blks;i++) |
546 | | { |
547 | | top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i; |
548 | | } |
549 | | */ |
550 | 259k | } |
551 | 420k | if(u1_top_lt_avail) |
552 | 376k | { |
553 | 376k | x_nbr = x_cur - 1; |
554 | 376k | y_nbr = y_cur - 1; |
555 | | |
556 | 376k | pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row |
557 | 376k | + x_nbr / 64; |
558 | 376k | intra_pos = ((x_nbr / 8) % 8); |
559 | | |
560 | 376k | top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1; |
561 | 376k | } |
562 | 420k | } |
563 | 1.73M | else |
564 | 1.73M | { |
565 | 1.73M | if(u1_top_avail) |
566 | 1.66M | top = 0xF; |
567 | 1.73M | if(u1_top_rt_avail) |
568 | 1.09M | top_right = 0xF; |
569 | 1.73M | if(u1_bot_lt_avail) |
570 | 544k | bot_left = 0xF; |
571 | 1.73M | if(u1_left_avail) |
572 | 1.70M | left = 0xF; |
573 | 1.73M | if(u1_top_lt_avail) |
574 | 1.64M | top_left = 0x1; |
575 | 1.73M | } |
576 | | |
577 | | /* Handling incomplete CTBs */ |
578 | 2.15M | { |
579 | 2.15M | WORD32 pu_size_limit = MIN(trans_size, 8); |
580 | 2.15M | WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples |
581 | 2.15M | - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) |
582 | 2.15M | - (ps_tu->b4_pos_x * MIN_TU_SIZE) |
583 | 2.15M | - (1 << (ps_tu->b3_size + 2)); |
584 | | /* ctb_size_top gives number of valid pixels remaining in the current row */ |
585 | 2.15M | WORD32 ctb_size_top = MIN(ctb_size, cols_remaining); |
586 | 2.15M | WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1; |
587 | | |
588 | 2.15M | WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples |
589 | 2.15M | - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) |
590 | 2.15M | - (ps_tu->b4_pos_y * MIN_TU_SIZE) |
591 | 2.15M | - (1 << (ps_tu->b3_size + 2)); |
592 | | /* ctb_size_bot gives number of valid pixels remaining in the current column */ |
593 | 2.15M | WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining); |
594 | 2.15M | WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1; |
595 | | |
596 | 2.15M | top_right &= ctb_size_top_bits; |
597 | 2.15M | bot_left &= ctb_size_bot_bits; |
598 | 2.15M | } |
599 | | |
600 | | /* Top-Left | Top-Right | Top | Left | Bottom-Left |
601 | | * 1 4 4 4 4 |
602 | | */ |
603 | | |
604 | | /* |
605 | | nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4) |
606 | | | gau4_ihevcd_4_bit_reverse[bot_left]; |
607 | | */ |
608 | 2.15M | nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4) |
609 | 2.15M | | gau4_ihevcd_4_bit_reverse[bot_left]; |
610 | | |
611 | | |
612 | 2.15M | return nbr_flags; |
613 | | |
614 | 2.15M | } |
615 | | |
616 | | static void ihevcd_iquant_itrans_recon_tu_plane(process_ctxt_t *ps_proc, |
617 | | tu_t *ps_tu, |
618 | | tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, |
619 | | WORD32 func_idx, |
620 | | WORD32 log2_trans_size, |
621 | | CHROMA_PLANE_ID_T chroma_plane, |
622 | | WORD8 intra_flag, |
623 | | WORD8 intra_pred_mode) |
624 | 46.6M | { |
625 | 46.6M | sps_t *ps_sps = ps_proc->ps_sps; |
626 | 46.6M | pps_t *ps_pps = ps_proc->ps_pps; |
627 | 46.6M | codec_t *ps_codec = ps_proc->ps_codec; |
628 | | |
629 | 46.6M | if(1 == ps_pl_tu_ctxt->cbf) |
630 | 2.46M | { |
631 | 2.46M | if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag) |
632 | 414k | { |
633 | | /* Recon */ |
634 | 414k | ps_codec->apf_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff, ps_pl_tu_ctxt->pu1_pred, |
635 | 414k | ps_pl_tu_ctxt->pu1_dst, ps_pl_tu_ctxt->tu_coeff_stride, |
636 | 414k | ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, |
637 | 414k | ps_pl_tu_ctxt->zero_cols); |
638 | 414k | } |
639 | 2.04M | else |
640 | 2.04M | { |
641 | | /* iQuant , iTrans and Recon */ |
642 | 2.04M | if((0 == ps_pl_tu_ctxt->coeff_type)) |
643 | 1.62M | { |
644 | 1.62M | ps_codec->apf_itrans_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff, |
645 | 1.62M | ps_proc->pi2_itrans_intrmd_buf, |
646 | 1.62M | ps_pl_tu_ctxt->pu1_pred, |
647 | 1.62M | ps_pl_tu_ctxt->pu1_dst, |
648 | 1.62M | ps_pl_tu_ctxt->tu_coeff_stride, |
649 | 1.62M | ps_pl_tu_ctxt->pred_strd, |
650 | 1.62M | ps_pl_tu_ctxt->dst_strd, |
651 | 1.62M | ps_pl_tu_ctxt->zero_cols, |
652 | 1.62M | ps_pl_tu_ctxt->zero_rows); |
653 | 1.62M | } |
654 | 420k | else /* DC only */ |
655 | 420k | { |
656 | 420k | ps_codec->apf_itrans_recon_dc[chroma_plane != NULL_PLANE]( |
657 | 420k | ps_pl_tu_ctxt->pu1_pred, ps_pl_tu_ctxt->pu1_dst, |
658 | 420k | ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, log2_trans_size, |
659 | 420k | ps_pl_tu_ctxt->coeff_value); |
660 | 420k | } |
661 | 2.04M | } |
662 | 2.46M | } |
663 | 46.6M | } |
664 | | |
665 | | #ifdef ENABLE_MAIN_REXT_PROFILE |
666 | | static void ihevcd_iquant_itrans_resi_recon_tu_plane(process_ctxt_t *ps_proc, |
667 | | tu_t *ps_tu, |
668 | | tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, |
669 | | WORD32 func_idx, |
670 | | WORD32 log2_trans_size, |
671 | | CHROMA_PLANE_ID_T chroma_plane, |
672 | | WORD8 intra_flag, |
673 | | WORD8 intra_pred_mode) |
674 | 0 | { |
675 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
676 | 0 | pps_t *ps_pps = ps_proc->ps_pps; |
677 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
678 | 0 | WORD8 trans_size = 1 << log2_trans_size; |
679 | 0 | WORD16 *pi2_res = ps_proc->pi2_res_luma_buf; |
680 | 0 | WORD16 *pi2_res_uv = ps_proc->pi2_res_chroma_buf; |
681 | 0 | WORD32 alpha = 0; |
682 | 0 | WORD16 *residue_out_base = chroma_plane == NULL_PLANE ? pi2_res : pi2_res_uv; |
683 | 0 | WORD16 *residue_out = residue_out_base; |
684 | | // if both rdpcm and rotate are to be applied, share the output residue buffer between the |
685 | | // two transforms |
686 | 0 | WORD16 *residue_out_intrmdt = residue_out_base + (TRANS_SIZE_4 * TRANS_SIZE_4); |
687 | |
|
688 | 0 | if(chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0) |
689 | 0 | { |
690 | 0 | alpha = (1 << (ps_tu->b3_cb_log2_res_scale_abs_plus1 - 1)) |
691 | 0 | * (1 - 2 * ps_tu->b1_cb_log2_res_sign); |
692 | 0 | } |
693 | 0 | else if(chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0) |
694 | 0 | { |
695 | 0 | alpha = (1 << (ps_tu->b3_cr_log2_res_scale_abs_plus1 - 1)) |
696 | 0 | * (1 - 2 * ps_tu->b1_cr_log2_res_sign); |
697 | 0 | } |
698 | 0 | if(1 == ps_pl_tu_ctxt->cbf) |
699 | 0 | { |
700 | 0 | if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag) |
701 | 0 | { |
702 | 0 | WORD8 rotate = ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4 |
703 | 0 | && intra_flag; |
704 | 0 | WORD8 rdpcm = (ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag |
705 | 0 | && (intra_pred_mode == 10 || intra_pred_mode == 26)) |
706 | 0 | || ps_pl_tu_ctxt->explicit_rdpcm_flag; |
707 | 0 | WORD16 *src_residue = ps_pl_tu_ctxt->pi2_tu_coeff; |
708 | 0 | WORD16 src_residue_strd = ps_pl_tu_ctxt->tu_coeff_stride; |
709 | |
|
710 | 0 | if(rotate) |
711 | 0 | { |
712 | 0 | ihevc_res_4x4_rotate(src_residue, rdpcm ? residue_out_intrmdt : residue_out, |
713 | 0 | src_residue_strd, trans_size, ps_pl_tu_ctxt->zero_cols); |
714 | 0 | ps_pl_tu_ctxt->zero_cols = |
715 | 0 | gau4_ihevcd_4_bit_reverse[ps_pl_tu_ctxt->zero_cols & 0xF]; |
716 | 0 | src_residue = residue_out_intrmdt; |
717 | 0 | src_residue_strd = trans_size; |
718 | 0 | } |
719 | |
|
720 | 0 | if(rdpcm) |
721 | 0 | { |
722 | 0 | WORD8 rdpcm_dir = |
723 | 0 | ps_pl_tu_ctxt->explicit_rdpcm_flag ? |
724 | 0 | ps_pl_tu_ctxt->explicit_rdpcm_dir : |
725 | 0 | intra_pred_mode != 10; |
726 | 0 | if(rdpcm_dir == 0) |
727 | 0 | { |
728 | 0 | ihevc_res_nxn_rdpcm_horz(src_residue, residue_out, src_residue_strd, trans_size, |
729 | 0 | trans_size, ps_pl_tu_ctxt->zero_cols); |
730 | 0 | ps_pl_tu_ctxt->zero_cols = (1 << CTZ(~ps_pl_tu_ctxt->zero_cols)) - 1; |
731 | 0 | } |
732 | 0 | else |
733 | 0 | { |
734 | 0 | ihevc_res_nxn_rdpcm_vert(src_residue, residue_out, src_residue_strd, trans_size, |
735 | 0 | trans_size, ps_pl_tu_ctxt->zero_cols); |
736 | 0 | } |
737 | 0 | } |
738 | |
|
739 | 0 | if(!rdpcm && !rotate) |
740 | 0 | { |
741 | 0 | ihevc_res_nxn_copy(src_residue, residue_out, src_residue_strd, trans_size, |
742 | 0 | trans_size, ps_pl_tu_ctxt->zero_cols); |
743 | 0 | } |
744 | 0 | } |
745 | 0 | else |
746 | 0 | { |
747 | | /* iQuant, iTrans */ |
748 | 0 | if(0 == ps_pl_tu_ctxt->coeff_type) |
749 | 0 | { |
750 | 0 | WORD32 func_tmp_idx = chroma_plane != NULL_PLANE ? func_idx - 4 : func_idx; |
751 | 0 | ps_codec->apf_itrans_res[func_tmp_idx](ps_pl_tu_ctxt->pi2_tu_coeff, |
752 | 0 | ps_proc->pi2_itrans_intrmd_buf, residue_out, |
753 | 0 | ps_pl_tu_ctxt->tu_coeff_stride, trans_size, |
754 | 0 | ps_pl_tu_ctxt->zero_cols, |
755 | 0 | ps_pl_tu_ctxt->zero_rows); |
756 | 0 | } |
757 | 0 | else /* DC only */ |
758 | 0 | { |
759 | 0 | ps_codec->apf_itrans_res_dc(residue_out, trans_size, log2_trans_size, |
760 | 0 | ps_pl_tu_ctxt->coeff_value); |
761 | 0 | } |
762 | 0 | ps_pl_tu_ctxt->zero_cols = 0; |
763 | 0 | } |
764 | 0 | if(!alpha) |
765 | 0 | { |
766 | 0 | ps_codec->apf_recon[func_idx](residue_out, ps_pl_tu_ctxt->pu1_pred, |
767 | 0 | ps_pl_tu_ctxt->pu1_dst, trans_size, |
768 | 0 | ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, |
769 | 0 | ps_pl_tu_ctxt->zero_cols); |
770 | 0 | } |
771 | 0 | } |
772 | 0 | if(alpha) |
773 | 0 | { |
774 | 0 | if(0 == ps_pl_tu_ctxt->cbf) |
775 | 0 | { |
776 | 0 | memset(residue_out, 0, trans_size * trans_size * sizeof(WORD16)); |
777 | 0 | } |
778 | 0 | ihevc_chroma_recon_nxn_ccp(pi2_res, pi2_res_uv, ps_pl_tu_ctxt->pu1_pred, |
779 | 0 | ps_pl_tu_ctxt->pu1_dst, alpha, trans_size, trans_size, |
780 | 0 | trans_size, ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd); |
781 | 0 | } |
782 | 0 | } |
783 | | |
784 | | PF_IQITRECON_PLANE get_iqitrec_func(process_ctxt_t *ps_proc, |
785 | | tu_t *ps_tu, |
786 | | tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, |
787 | | WORD32 log2_trans_size, |
788 | | CHROMA_PLANE_ID_T chroma_plane, |
789 | | WORD8 intra_flag, |
790 | | WORD8 intra_pred_mode) |
791 | 46.6M | { |
792 | 46.6M | sps_t *ps_sps = ps_proc->ps_sps; |
793 | 46.6M | pps_t *ps_pps = ps_proc->ps_pps; |
794 | 46.6M | WORD8 trans_size = 1 << log2_trans_size; |
795 | | |
796 | 46.6M | if(1 == ps_pl_tu_ctxt->cbf |
797 | 2.46M | && (ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag)) |
798 | 414k | { |
799 | 414k | if(ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4 && intra_flag) |
800 | 0 | return ihevcd_iquant_itrans_resi_recon_tu_plane; |
801 | 414k | if(ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag |
802 | 0 | && (intra_pred_mode == 10 || intra_pred_mode == 26)) |
803 | 0 | return ihevcd_iquant_itrans_resi_recon_tu_plane; |
804 | 414k | if(ps_pl_tu_ctxt->explicit_rdpcm_flag) |
805 | 0 | return ihevcd_iquant_itrans_resi_recon_tu_plane; |
806 | 414k | } |
807 | 46.6M | if(ps_pps->i1_cross_component_prediction_enabled_flag) |
808 | 0 | { |
809 | 0 | if((chroma_plane == NULL_PLANE |
810 | 0 | && (ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0 |
811 | 0 | || ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0)) |
812 | 0 | || (chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0) |
813 | 0 | || (chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0)) |
814 | 0 | return ihevcd_iquant_itrans_resi_recon_tu_plane; |
815 | 0 | } |
816 | 46.6M | return ihevcd_iquant_itrans_recon_tu_plane; |
817 | 46.6M | } |
818 | | #endif |
819 | | |
820 | | WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) |
821 | 2.61M | { |
822 | 2.61M | WORD16 *pi2_scaling_mat; |
823 | 2.61M | UWORD8 *pu1_y_dst_ctb; |
824 | 2.61M | UWORD8 *pu1_uv_dst_ctb; |
825 | 2.61M | WORD32 ctb_size; |
826 | 2.61M | codec_t *ps_codec; |
827 | 2.61M | slice_header_t *ps_slice_hdr; |
828 | 2.61M | tu_t *ps_tu; |
829 | 2.61M | WORD16 *pi2_ctb_coeff; |
830 | 2.61M | WORD32 tu_cnt; |
831 | 2.61M | WORD16 *pi2_tu_coeff; |
832 | 2.61M | WORD32 pic_strd; |
833 | 2.61M | WORD32 luma_nbr_flags; |
834 | 2.61M | WORD32 luma_nbr_flags_4x4[4] = { 0 }; |
835 | 2.61M | WORD32 chroma_nbr_flags = 0; |
836 | 2.61M | WORD32 chroma_nbr_flags_subtu = 0; |
837 | 2.61M | #ifdef ENABLE_MAIN_REXT_PROFILE |
838 | 2.61M | WORD32 disable_boundary_filter = 0; |
839 | 2.61M | #endif |
840 | 2.61M | UWORD8 u1_luma_pred_mode_first_tu = 0; |
841 | | /* Pointers for generating 2d coeffs from coeff-map */ |
842 | 2.61M | UWORD8 *pu1_tu_coeff_data; |
843 | | /* nbr avail map for CTB */ |
844 | | /* 1st bit points to neighbor (left/top_left/bot_left) */ |
845 | | /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */ |
846 | 2.61M | UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE + 2 /* Top nbr + bot nbr */]; |
847 | 2.61M | UWORD32 top_avail_bits; |
848 | 2.61M | sps_t *ps_sps; |
849 | 2.61M | pps_t *ps_pps; |
850 | 2.61M | WORD32 intra_flag; |
851 | 2.61M | UWORD8 *pu1_pic_intra_flag; |
852 | 2.61M | WORD32 h_samp_factor, v_samp_factor; |
853 | 2.61M | WORD32 chroma_pixel_strd = 2; |
854 | 2.61M | PF_IQITRECON_PLANE iqitrecon_fptr = ihevcd_iquant_itrans_recon_tu_plane; |
855 | | |
856 | | /*************************************************************************/ |
857 | | /* Contanis scaling matrix offset in the following order in a 1D buffer */ |
858 | | /* Entries that are listed as UNUSED are invalid combinations where */ |
859 | | /* scaling matrix is not used. eg: 64x64 SKIP CU, 64x64 PCM CU */ |
860 | | /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */ |
861 | | /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */ |
862 | | /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */ |
863 | | /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */ |
864 | | /* Intra 16x16 Y, 16x16 U, 16x16 V */ |
865 | | /* Inter 16x16 Y, 16x16 U, 16x16 V */ |
866 | | /* Intra 32x32 Y, 32x32 U, 32x32 V */ |
867 | | /* Inter 32x32 Y, 32x32 U, 32x32 V */ |
868 | | /* UNUSED, UNUSED, UNUSED */ |
869 | | /* UNUSED, UNUSED, UNUSED */ |
870 | | /*************************************************************************/ |
871 | 2.61M | static const WORD32 scaling_mat_offset[] = |
872 | 2.61M | { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, |
873 | 2.61M | 1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136, 0, 0, 0, 0, 0, 0}; |
874 | | |
875 | 2.61M | PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED(); |
876 | | |
877 | 2.61M | ps_sps = ps_proc->ps_sps; |
878 | 2.61M | ps_pps = ps_proc->ps_pps; |
879 | 2.61M | ps_slice_hdr = ps_proc->ps_slice_hdr; |
880 | 2.61M | ps_codec = ps_proc->ps_codec; |
881 | | |
882 | 2.61M | pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma; |
883 | 2.61M | pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma; |
884 | | |
885 | 2.61M | pi2_ctb_coeff = ps_proc->pi2_invscan_out; |
886 | | |
887 | 2.61M | ctb_size = (1 << ps_sps->i1_log2_ctb_size); |
888 | 2.61M | pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data; |
889 | | |
890 | 2.61M | pic_strd = ps_codec->i4_strd; |
891 | | |
892 | 2.61M | pi2_tu_coeff = pi2_ctb_coeff; |
893 | | |
894 | 2.61M | ps_tu = ps_proc->ps_tu; |
895 | | |
896 | 2.61M | if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag)) |
897 | 24.4k | { |
898 | 24.4k | pi2_scaling_mat = ps_pps->pi2_scaling_mat; |
899 | 24.4k | } |
900 | 2.58M | else |
901 | 2.58M | { |
902 | 2.58M | pi2_scaling_mat = ps_sps->pi2_scaling_mat; |
903 | 2.58M | } |
904 | | |
905 | 2.61M | { |
906 | | /* Updating the initial availability map */ |
907 | 2.61M | WORD32 i; |
908 | 2.61M | UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail, |
909 | 2.61M | u1_top_ctb_avail; |
910 | | |
911 | 2.61M | u1_left_ctb_avail = ps_proc->u1_left_ctb_avail; |
912 | 2.61M | u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail; |
913 | 2.61M | u1_top_ctb_avail = ps_proc->u1_top_ctb_avail; |
914 | 2.61M | u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail; |
915 | | |
916 | | /* Initializing the availability array */ |
917 | 2.61M | memset(au4_intra_nbr_avail, 0, |
918 | 2.61M | (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32)); |
919 | | /* Initializing the availability array with CTB level availability flags */ |
920 | 2.61M | { |
921 | 2.61M | WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size); |
922 | 2.61M | WORD32 ctb_size_left = MIN(ctb_size, rows_remaining); |
923 | 24.6M | for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++) |
924 | 22.0M | { |
925 | 22.0M | au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31); |
926 | 22.0M | } |
927 | 2.61M | } |
928 | 2.61M | au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31) |
929 | 2.61M | >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */ |
930 | | |
931 | 2.61M | au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31); |
932 | | |
933 | 2.61M | { |
934 | 2.61M | WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size); |
935 | 2.61M | WORD32 ctb_size_top = MIN(ctb_size, cols_remaining); |
936 | 2.61M | WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE)); |
937 | | |
938 | | /* ctb_size_top gives number of valid pixels remaining in the current row */ |
939 | | /* Since we need pattern of 1's starting from the MSB, an additional shift */ |
940 | | /* is needed */ |
941 | 2.61M | shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE); |
942 | | |
943 | 2.61M | top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1) |
944 | 2.61M | << shift; |
945 | 2.61M | } |
946 | 2.61M | au4_intra_nbr_avail[0] |= ( |
947 | 2.61M | (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0); |
948 | | /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */ |
949 | | |
950 | 2.61M | } |
951 | | |
952 | 2.61M | h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; |
953 | 2.61M | v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; |
954 | | |
955 | | /* Applying Inverse transform on all the TU's in CTB */ |
956 | 45.6M | for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++) |
957 | 43.0M | { |
958 | 43.0M | tu_plane_iq_it_recon_ctxt_t y_cb_tu = { 0 }; |
959 | 43.0M | tu_plane_iq_it_recon_ctxt_t cr_tu = { 0 }; |
960 | 43.0M | tu_plane_iq_it_recon_ctxt_t *ps_cb_tu = &y_cb_tu; |
961 | 43.0M | tu_plane_iq_it_recon_ctxt_t *ps_cr_tu = &cr_tu; |
962 | 43.0M | #ifdef ENABLE_MAIN_REXT_PROFILE |
963 | 43.0M | tu_plane_iq_it_recon_ctxt_t cb_sub_tu = { 0 }; |
964 | 43.0M | tu_plane_iq_it_recon_ctxt_t cr_sub_tu = { 0 }; |
965 | 43.0M | #endif |
966 | | |
967 | 43.0M | WORD32 num_comp, c_idx, func_idx; |
968 | | |
969 | 43.0M | WORD32 qp_div = 0, qp_rem = 0; |
970 | 43.0M | WORD32 qp_div_v = 0, qp_rem_v = 0; |
971 | 43.0M | WORD32 chroma_qp_idx; |
972 | 43.0M | WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset; |
973 | 43.0M | WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL; |
974 | | |
975 | 43.0M | WORD32 trans_size = 0; |
976 | 43.0M | TRANSFORM_TYPE e_trans_type; |
977 | 43.0M | WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2; |
978 | 43.0M | WORD32 log2_trans_size; |
979 | | |
980 | 43.0M | WORD32 tu_x, tu_y; |
981 | 43.0M | WORD32 tu_y_offset, tu_uv_offset; |
982 | 43.0M | UWORD8 u1_luma_pred_mode, u1_chroma_pred_mode; |
983 | 43.0M | WORD32 offset; |
984 | 43.0M | WORD32 pcm_flag; |
985 | 43.0M | WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); |
986 | | /* If 420SP_VU is chroma format, pred and dst pointer */ |
987 | | /* will be added +1 to point to U */ |
988 | 43.0M | WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu; |
989 | | /* If 420SP_VU is chroma format, pred and dst pointer */ |
990 | | /* will be added U offset of +1 and subtracted 2 */ |
991 | | /* to point to V */ |
992 | 43.0M | WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu; |
993 | | |
994 | 43.0M | tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */ |
995 | 43.0M | tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */ |
996 | 43.0M | { |
997 | 43.0M | WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x); |
998 | 43.0M | WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y); |
999 | | |
1000 | 43.0M | WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; |
1001 | | |
1002 | 43.0M | pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag; |
1003 | 43.0M | pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row; |
1004 | 43.0M | pu1_pic_intra_flag += (tu_abs_x >> 6); |
1005 | | |
1006 | 43.0M | intra_flag = *pu1_pic_intra_flag; |
1007 | 43.0M | intra_flag &= (1 << ((tu_abs_x >> 3) % 8)); |
1008 | 43.0M | } |
1009 | | |
1010 | 43.0M | u1_luma_pred_mode = ps_tu->b6_luma_intra_mode; |
1011 | 43.0M | u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx; |
1012 | | |
1013 | 43.0M | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && u1_chroma_pred_mode != 7) |
1014 | 1.81M | num_comp = 2; /* Y and UV */ |
1015 | 41.2M | else |
1016 | 41.2M | num_comp = 1; /* Y */ |
1017 | | |
1018 | 43.0M | pcm_flag = 0; |
1019 | | |
1020 | 43.0M | if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE)) |
1021 | 86 | { |
1022 | 86 | UWORD8 *pu1_buf; |
1023 | 86 | UWORD8 *pu1_y_dst = pu1_y_dst_ctb; |
1024 | 86 | UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb; |
1025 | 86 | WORD32 i, j; |
1026 | 86 | tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data; |
1027 | 86 | WORD32 cb_size = 1 << (ps_tu->b3_size + 2); |
1028 | | |
1029 | | /* trans_size is used to update availability after reconstruction */ |
1030 | 86 | trans_size = cb_size; |
1031 | | |
1032 | 86 | pcm_flag = 1; |
1033 | | |
1034 | 86 | tu_y_offset = tu_x + tu_y * pic_strd; |
1035 | 86 | pu1_y_dst += tu_x + tu_y * pic_strd; |
1036 | | |
1037 | | /* First byte points to number of coded blocks */ |
1038 | 86 | pu1_tu_coeff_data++; |
1039 | | |
1040 | | /* Next byte points to scan type */ |
1041 | 86 | pu1_tu_coeff_data++; |
1042 | | |
1043 | 86 | ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data; |
1044 | | |
1045 | 86 | pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0]; |
1046 | 86 | { |
1047 | | |
1048 | 966 | for(i = 0; i < cb_size; i++) |
1049 | 880 | { |
1050 | | //pu1_y_dst[i * pic_strd + j] = *pu1_buf++; |
1051 | 880 | memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size); |
1052 | 880 | pu1_buf += cb_size; |
1053 | 880 | } |
1054 | | |
1055 | 86 | if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) |
1056 | 86 | { |
1057 | 86 | WORD32 chroma_strd = (pic_strd * chroma_pixel_strd) / h_samp_factor; |
1058 | | |
1059 | 86 | pu1_uv_dst += (tu_x * chroma_pixel_strd / h_samp_factor) |
1060 | 86 | + (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor)); |
1061 | 86 | pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset; |
1062 | | |
1063 | | /* U */ |
1064 | 526 | for(i = 0; i < cb_size / v_samp_factor; i++) |
1065 | 440 | { |
1066 | 3.16k | for(j = 0; j < cb_size / h_samp_factor; j++) |
1067 | 2.72k | { |
1068 | 2.72k | pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++; |
1069 | 2.72k | } |
1070 | 440 | } |
1071 | | |
1072 | 86 | pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset; |
1073 | | |
1074 | | /* V */ |
1075 | 526 | for(i = 0; i < cb_size / v_samp_factor; i++) |
1076 | 440 | { |
1077 | 3.16k | for(j = 0; j < cb_size / h_samp_factor; j++) |
1078 | 2.72k | { |
1079 | 2.72k | pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++; |
1080 | 2.72k | } |
1081 | 440 | } |
1082 | 86 | } |
1083 | 86 | } |
1084 | | |
1085 | 86 | pu1_tu_coeff_data = pu1_buf; |
1086 | | |
1087 | 86 | } |
1088 | | |
1089 | | |
1090 | | |
1091 | | |
1092 | | |
1093 | 87.7M | for(c_idx = 0; c_idx < num_comp; c_idx++) |
1094 | 44.7M | { |
1095 | 44.7M | if(0 == pcm_flag) |
1096 | 44.7M | { |
1097 | | |
1098 | 44.7M | if(c_idx == 0) /* Y */ |
1099 | 42.9M | { |
1100 | | /* Initializing variables */ |
1101 | | |
1102 | 42.9M | log2_y_trans_size_minus_2 = ps_tu->b3_size; |
1103 | 42.9M | trans_size = 1 << (log2_y_trans_size_minus_2 + 2); |
1104 | 42.9M | log2_trans_size = log2_y_trans_size_minus_2 + 2; |
1105 | | |
1106 | 42.9M | tu_y_offset = tu_x + tu_y * pic_strd; |
1107 | | |
1108 | | /* Calculating scaling matrix offset */ |
1109 | 42.9M | offset = log2_y_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx; |
1110 | 42.9M | pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset]; |
1111 | | |
1112 | | /* 4x4 transform Luma in INTRA mode is DST */ |
1113 | 42.9M | if(log2_y_trans_size_minus_2 == 0 && intra_flag) |
1114 | 1.39M | { |
1115 | 1.39M | func_idx = log2_y_trans_size_minus_2; |
1116 | 1.39M | e_trans_type = DST_4x4; |
1117 | 1.39M | } |
1118 | 41.5M | else |
1119 | 41.5M | { |
1120 | 41.5M | func_idx = log2_y_trans_size_minus_2 + 1; |
1121 | 41.5M | e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1); |
1122 | 41.5M | } |
1123 | | |
1124 | 42.9M | qp_div = ps_tu->b7_qp / 6; |
1125 | 42.9M | qp_rem = ps_tu->b7_qp % 6; |
1126 | | |
1127 | 42.9M | y_cb_tu.pi2_tu_coeff = pi2_tu_coeff; |
1128 | 42.9M | y_cb_tu.pu1_pred = pu1_y_dst_ctb + tu_y_offset; |
1129 | 42.9M | y_cb_tu.pu1_dst = pu1_y_dst_ctb + tu_y_offset; |
1130 | 42.9M | y_cb_tu.tu_coeff_stride = trans_size; |
1131 | 42.9M | y_cb_tu.pred_strd = pic_strd; |
1132 | 42.9M | y_cb_tu.dst_strd = pic_strd; |
1133 | 42.9M | y_cb_tu.cbf = ps_tu->b1_y_cbf; |
1134 | 42.9M | y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; |
1135 | 42.9M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1136 | 42.9M | y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; |
1137 | 42.9M | y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; |
1138 | 42.9M | #endif |
1139 | | /* Unpacking coeffs */ |
1140 | 42.9M | if(1 == y_cb_tu.cbf) |
1141 | 1.92M | { |
1142 | 1.92M | pu1_tu_coeff_data = ihevcd_unpack_coeffs( |
1143 | 1.92M | y_cb_tu.pi2_tu_coeff, log2_y_trans_size_minus_2 + 2, |
1144 | 1.92M | pu1_tu_coeff_data, pi2_dequant_matrix, |
1145 | 1.92M | qp_rem, qp_div, e_trans_type, |
1146 | 1.92M | ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols, |
1147 | 1.92M | &y_cb_tu.zero_rows, &y_cb_tu.coeff_type, |
1148 | 1.92M | &y_cb_tu.coeff_value); |
1149 | 1.92M | } |
1150 | 42.9M | } |
1151 | 1.81M | else /* UV interleaved */ |
1152 | 1.81M | { |
1153 | | /* Initializing variables */ |
1154 | 1.81M | const WORD16 *pi2_ihevcd_chroma_qp = |
1155 | 1.81M | CHROMA_FMT_IDC_YUV420 != ps_sps->i1_chroma_format_idc ? |
1156 | 0 | gai2_ihevcd_chroma_qp_clip : |
1157 | 1.81M | gai2_ihevcd_chroma_qp_420; |
1158 | | |
1159 | | /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */ |
1160 | 1.81M | if(ps_tu->b3_size == 0) |
1161 | 516k | { |
1162 | 516k | log2_uv_trans_size_minus_2 = ps_tu->b3_size; |
1163 | 516k | if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) |
1164 | 0 | { |
1165 | 0 | tu_uv_offset = (tu_x * chroma_pixel_strd) |
1166 | 0 | + (tu_y * chroma_pixel_strd * pic_strd); |
1167 | 0 | } |
1168 | 516k | else |
1169 | 516k | { |
1170 | | /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x - 4, luma pos y - 4) */ |
1171 | 516k | tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / v_samp_factor) * pic_strd; |
1172 | 516k | } |
1173 | 516k | } |
1174 | 1.29M | else |
1175 | 1.29M | { |
1176 | 1.29M | if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) |
1177 | 0 | { |
1178 | 0 | log2_uv_trans_size_minus_2 = ps_tu->b3_size; |
1179 | 0 | } |
1180 | 1.29M | else |
1181 | 1.29M | { |
1182 | 1.29M | log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1; |
1183 | 1.29M | } |
1184 | 1.29M | tu_uv_offset = (tu_x * chroma_pixel_strd / h_samp_factor) |
1185 | 1.29M | + (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor)); |
1186 | 1.29M | } |
1187 | 1.81M | trans_size = 1 << (log2_uv_trans_size_minus_2 + 2); |
1188 | 1.81M | log2_trans_size = log2_uv_trans_size_minus_2 + 2; |
1189 | | |
1190 | | /*TODO: Add support for choosing different tables for U and V, |
1191 | | * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes |
1192 | | */ |
1193 | | /* Calculating scaling matrix offset */ |
1194 | | /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since |
1195 | | * max uv trans size is 16x16 |
1196 | | */ |
1197 | 1.81M | offset = log2_uv_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx; |
1198 | 1.81M | pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset]; |
1199 | 1.81M | pi2_dequant_matrix_v = pi2_scaling_mat + scaling_mat_offset[offset + 1]; |
1200 | | |
1201 | 1.81M | func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/ |
1202 | | |
1203 | | /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma. |
1204 | | * Limit func_idx based on allowed max chroma tu size */ |
1205 | 1.81M | func_idx = MIN(func_idx, (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 8 : 7); |
1206 | | |
1207 | 1.81M | e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1); |
1208 | | /* QP for U */ |
1209 | 1.81M | i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset; |
1210 | 1.81M | i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset; |
1211 | | |
1212 | 1.81M | chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset; |
1213 | 1.81M | chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); |
1214 | 1.81M | qp_div = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6; |
1215 | 1.81M | qp_rem = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6; |
1216 | | |
1217 | | /* QP for V */ |
1218 | 1.81M | i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset; |
1219 | 1.81M | i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset; |
1220 | | |
1221 | 1.81M | chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset; |
1222 | 1.81M | chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); |
1223 | 1.81M | qp_div_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6; |
1224 | 1.81M | qp_rem_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6; |
1225 | | |
1226 | 1.81M | y_cb_tu.pi2_tu_coeff = pi2_tu_coeff; |
1227 | 1.81M | y_cb_tu.pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ |
1228 | 1.81M | y_cb_tu.pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ |
1229 | 1.81M | y_cb_tu.tu_coeff_stride = trans_size; |
1230 | 1.81M | y_cb_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1231 | 1.81M | y_cb_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1232 | 1.81M | y_cb_tu.cbf = ps_tu->b1_cb_cbf; |
1233 | | |
1234 | 1.81M | cr_tu.pi2_tu_coeff = pi2_tu_coeff + trans_size * trans_size; |
1235 | 1.81M | cr_tu.pu1_pred = y_cb_tu.pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ |
1236 | 1.81M | cr_tu.pu1_dst = y_cb_tu.pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ |
1237 | 1.81M | cr_tu.tu_coeff_stride = trans_size; |
1238 | 1.81M | cr_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1239 | 1.81M | cr_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1240 | 1.81M | cr_tu.cbf = ps_tu->b1_cr_cbf; |
1241 | | |
1242 | | /* Unpacking coeffs */ |
1243 | 1.81M | y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; |
1244 | 1.81M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1245 | 1.81M | y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; |
1246 | 1.81M | y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; |
1247 | 1.81M | #endif |
1248 | 1.81M | if(1 == y_cb_tu.cbf) |
1249 | 273k | { |
1250 | 273k | pu1_tu_coeff_data = ihevcd_unpack_coeffs( |
1251 | 273k | y_cb_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, |
1252 | 273k | pu1_tu_coeff_data, pi2_dequant_matrix, |
1253 | 273k | qp_rem, qp_div, e_trans_type, |
1254 | 273k | ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols, |
1255 | 273k | &y_cb_tu.zero_rows, &y_cb_tu.coeff_type, |
1256 | 273k | &y_cb_tu.coeff_value); |
1257 | 273k | } |
1258 | 1.81M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1259 | 1.81M | if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) |
1260 | 0 | { |
1261 | 0 | cb_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu; |
1262 | 0 | cb_sub_tu.pu1_pred = y_cb_tu.pu1_pred + trans_size * y_cb_tu.pred_strd; |
1263 | 0 | cb_sub_tu.pu1_dst = y_cb_tu.pu1_dst + trans_size * y_cb_tu.dst_strd; |
1264 | 0 | cb_sub_tu.tu_coeff_stride = trans_size; |
1265 | 0 | cb_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1266 | 0 | cb_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1267 | 0 | cb_sub_tu.cbf = ps_tu->b1_cb_cbf_subtu1; |
1268 | 0 | cb_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; |
1269 | 0 | cb_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; |
1270 | 0 | cb_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; |
1271 | 0 | if(1 == cb_sub_tu.cbf) |
1272 | 0 | { |
1273 | 0 | pu1_tu_coeff_data = ihevcd_unpack_coeffs( |
1274 | 0 | cb_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, |
1275 | 0 | pu1_tu_coeff_data, pi2_dequant_matrix, |
1276 | 0 | qp_rem, qp_div, e_trans_type, |
1277 | 0 | ps_tu->b1_transquant_bypass, &cb_sub_tu.zero_cols, |
1278 | 0 | &cb_sub_tu.zero_rows, &cb_sub_tu.coeff_type, |
1279 | 0 | &cb_sub_tu.coeff_value); |
1280 | 0 | } |
1281 | 0 | } |
1282 | 1.81M | #endif |
1283 | | |
1284 | 1.81M | cr_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; |
1285 | 1.81M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1286 | 1.81M | cr_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; |
1287 | 1.81M | cr_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; |
1288 | 1.81M | #endif |
1289 | 1.81M | if(1 == cr_tu.cbf) |
1290 | 269k | { |
1291 | 269k | pu1_tu_coeff_data = ihevcd_unpack_coeffs( |
1292 | 269k | cr_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, |
1293 | 269k | pu1_tu_coeff_data, pi2_dequant_matrix_v, |
1294 | 269k | qp_rem_v, qp_div_v, e_trans_type, |
1295 | 269k | ps_tu->b1_transquant_bypass, &cr_tu.zero_cols, |
1296 | 269k | &cr_tu.zero_rows, &cr_tu.coeff_type, &cr_tu.coeff_value); |
1297 | 269k | } |
1298 | 1.81M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1299 | 1.81M | if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) |
1300 | 0 | { |
1301 | 0 | cr_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu + trans_size * trans_size; |
1302 | 0 | cr_sub_tu.pu1_pred = cr_tu.pu1_pred + trans_size * cr_tu.pred_strd; |
1303 | 0 | cr_sub_tu.pu1_dst = cr_tu.pu1_dst + trans_size * cr_tu.dst_strd; |
1304 | 0 | cr_sub_tu.tu_coeff_stride = trans_size; |
1305 | 0 | cr_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1306 | 0 | cr_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; |
1307 | 0 | cr_sub_tu.cbf = ps_tu->b1_cr_cbf_subtu1; |
1308 | 0 | cr_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; |
1309 | 0 | cr_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; |
1310 | 0 | cr_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; |
1311 | 0 | if(1 == cr_sub_tu.cbf) |
1312 | 0 | { |
1313 | 0 | pu1_tu_coeff_data = ihevcd_unpack_coeffs( |
1314 | 0 | cr_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, |
1315 | 0 | pu1_tu_coeff_data, pi2_dequant_matrix_v, |
1316 | 0 | qp_rem_v, qp_div_v, e_trans_type, |
1317 | 0 | ps_tu->b1_transquant_bypass, &cr_sub_tu.zero_cols, |
1318 | 0 | &cr_sub_tu.zero_rows, &cr_sub_tu.coeff_type, |
1319 | 0 | &cr_sub_tu.coeff_value); |
1320 | 0 | } |
1321 | 0 | } |
1322 | 1.81M | #endif |
1323 | 1.81M | } |
1324 | 44.7M | WORD8 subtu_idx = 0; |
1325 | 44.7M | do |
1326 | 44.7M | { |
1327 | | /***************************************************************/ |
1328 | | /****************** Intra Prediction **************************/ |
1329 | | /***************************************************************/ |
1330 | 44.7M | if(intra_flag) /* Intra */ |
1331 | 3.26M | { |
1332 | | /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed, |
1333 | | au1_ref_sub_out size is kept as multiple of 8, |
1334 | | so that SIMD functions can load 64 bits. Also some SIMD |
1335 | | modules read few bytes before the start of the array, so |
1336 | | allocate 16 extra bytes at the start */ |
1337 | 3.26M | UWORD8 au1_ref_sub_out[16 + (MAX_TU_SIZE * 2 * 2 * 2) + 8] = {0}; |
1338 | 3.26M | UWORD8 *pu1_ref_sub_out = &au1_ref_sub_out[16]; |
1339 | 3.26M | UWORD8 *pu1_top_left, *pu1_top, *pu1_left; |
1340 | 3.26M | WORD32 luma_pred_func_idx, chroma_pred_func_idx; |
1341 | | |
1342 | | /* Get the neighbour availability flags */ |
1343 | | /* Done for only Y */ |
1344 | 3.26M | if(c_idx == 0) |
1345 | 2.15M | { |
1346 | | /* Get neighbor availability for Y only */ |
1347 | 2.15M | luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc, |
1348 | 2.15M | ps_tu, |
1349 | 2.15M | au4_intra_nbr_avail, |
1350 | 2.15M | ps_sps->i2_pic_width_in_luma_samples, |
1351 | 2.15M | ps_pps->i1_constrained_intra_pred_flag, |
1352 | 2.15M | trans_size, |
1353 | 2.15M | ctb_size); |
1354 | | |
1355 | 2.15M | if(trans_size == 4) |
1356 | 1.39M | luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags; |
1357 | | |
1358 | 2.15M | if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) |
1359 | 0 | { |
1360 | 0 | chroma_nbr_flags = luma_nbr_flags; |
1361 | 0 | } |
1362 | 2.15M | else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) |
1363 | 0 | { |
1364 | 0 | WORD32 bot_left, left, top, tp_right, tp_left; |
1365 | 0 | tp_left = (luma_nbr_flags & 0x10000); |
1366 | 0 | tp_right = (luma_nbr_flags & 0x0f000); |
1367 | 0 | top = (luma_nbr_flags & 0x00f00); |
1368 | 0 | left = (luma_nbr_flags & 0x000f0); |
1369 | 0 | bot_left = (luma_nbr_flags & 0x0000f); |
1370 | 0 | chroma_nbr_flags = tp_left | tp_right | top | left | (left >> 4); |
1371 | 0 | chroma_nbr_flags_subtu = ((left != 0 ? 1 : 0) << 16) | (0xf << 8) |
1372 | 0 | | left | bot_left; |
1373 | 0 | } |
1374 | 2.15M | else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) |
1375 | 2.15M | { |
1376 | 2.15M | if(((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))) |
1377 | 1.11M | chroma_nbr_flags = luma_nbr_flags; |
1378 | 2.15M | } |
1379 | | |
1380 | | /* Initializing nbr pointers */ |
1381 | 2.15M | pu1_top = y_cb_tu.pu1_pred - pic_strd; |
1382 | 2.15M | pu1_left = y_cb_tu.pu1_pred - 1; |
1383 | 2.15M | pu1_top_left = y_cb_tu.pu1_pred - pic_strd - 1; |
1384 | | |
1385 | | /* call reference array substitution */ |
1386 | 2.15M | if(luma_nbr_flags == 0x1ffff) |
1387 | 518k | ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr( |
1388 | 518k | pu1_top_left, |
1389 | 518k | pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1); |
1390 | 1.63M | else |
1391 | 1.63M | ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr( |
1392 | 1.63M | pu1_top_left, |
1393 | 1.63M | pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1); |
1394 | | |
1395 | | /* call reference filtering */ |
1396 | 2.15M | ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr( |
1397 | 2.15M | pu1_ref_sub_out, |
1398 | 2.15M | trans_size, |
1399 | 2.15M | pu1_ref_sub_out, |
1400 | 2.15M | u1_luma_pred_mode, |
1401 | 2.15M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1402 | 2.15M | (ps_sps->i1_intra_smoothing_disabled_flag << 3 |
1403 | 2.15M | | ps_sps->i1_strong_intra_smoothing_enable_flag) |
1404 | | #else |
1405 | | ps_sps->i1_strong_intra_smoothing_enable_flag |
1406 | | #endif |
1407 | 2.15M | ); |
1408 | | |
1409 | | /* use the look up to get the function idx */ |
1410 | 2.15M | luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode]; |
1411 | | |
1412 | 2.15M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1413 | 2.15M | if(ps_sps->i1_implicit_rdpcm_enabled_flag && ps_tu->b1_transquant_bypass |
1414 | 0 | && (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26)) |
1415 | 0 | disable_boundary_filter = 1; |
1416 | 2.15M | #endif |
1417 | | /* call the intra prediction function */ |
1418 | 2.15M | ps_codec->apf_intra_pred_luma[luma_pred_func_idx]( |
1419 | 2.15M | pu1_ref_sub_out, 1, |
1420 | 2.15M | y_cb_tu.pu1_pred, |
1421 | 2.15M | y_cb_tu.pred_strd, |
1422 | 2.15M | trans_size, |
1423 | 2.15M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1424 | 2.15M | (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26) ? |
1425 | 263k | disable_boundary_filter : |
1426 | 2.15M | u1_luma_pred_mode |
1427 | | #else |
1428 | | u1_luma_pred_mode |
1429 | | #endif |
1430 | 2.15M | ); |
1431 | 2.15M | } |
1432 | 1.11M | else |
1433 | 1.11M | { |
1434 | | |
1435 | 1.11M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1436 | 1.11M | if(subtu_idx != 0) |
1437 | 0 | { |
1438 | 0 | ps_cb_tu = &cb_sub_tu; |
1439 | 0 | ps_cr_tu = &cr_sub_tu; |
1440 | 0 | chroma_nbr_flags = chroma_nbr_flags_subtu; |
1441 | 0 | } |
1442 | 1.11M | #endif |
1443 | | |
1444 | | /* In case of yuv420sp_vu, prediction happens as usual. */ |
1445 | | /* So point the pu1_pred pointer to original prediction pointer */ |
1446 | 1.11M | UWORD8 *pu1_pred_orig = ps_cb_tu->pu1_pred - chroma_yuv420sp_vu_u_offset; |
1447 | | |
1448 | | /* Top-Left | Top-Right | Top | Left | Bottom-Left |
1449 | | * 1 4 4 4 4 |
1450 | | * |
1451 | | * Generating chroma_nbr_flags depending upon the transform size */ |
1452 | 1.11M | if(ps_tu->b3_size == 0) |
1453 | 348k | { |
1454 | 348k | if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) |
1455 | 348k | { |
1456 | | /* Take TL,T,L flags of First luma 4x4 block */ |
1457 | 348k | chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); |
1458 | | /* Take TR flags of Second luma 4x4 block */ |
1459 | 348k | chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); |
1460 | | /* Take BL flags of Third luma 4x4 block */ |
1461 | 348k | chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); |
1462 | 348k | } |
1463 | 8 | else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) |
1464 | 0 | { |
1465 | 0 | if(subtu_idx == 0) |
1466 | 0 | { |
1467 | | /* Take TL,T,L flags of First luma 4x4 block */ |
1468 | 0 | chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); |
1469 | | /* Take TR flags of Second luma 4x4 block */ |
1470 | 0 | chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); |
1471 | | /* Take BL flags of first luma 4x4 block */ |
1472 | 0 | chroma_nbr_flags |= (luma_nbr_flags_4x4[0] & 0x0000F); |
1473 | 0 | } |
1474 | 0 | else |
1475 | 0 | { |
1476 | | /* Take TL,T,L flags of Third luma 4x4 block */ |
1477 | 0 | chroma_nbr_flags = (luma_nbr_flags_4x4[2] & 0x10FF0); |
1478 | | /* Take BL flags of Third luma 4x4 block */ |
1479 | 0 | chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); |
1480 | 0 | } |
1481 | 0 | } |
1482 | 348k | } |
1483 | | |
1484 | | /* Initializing nbr pointers */ |
1485 | 1.11M | pu1_top = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor); |
1486 | 1.11M | pu1_left = pu1_pred_orig - 2; |
1487 | 1.11M | pu1_top_left = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor) - 2; |
1488 | | |
1489 | 1.11M | if(subtu_idx == 0) |
1490 | 1.11M | { |
1491 | | /* Chroma pred mode derivation from luma pred mode */ |
1492 | 1.11M | { |
1493 | 1.11M | tu_t *ps_tu_tmp = ps_tu; |
1494 | 1.11M | if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV444) |
1495 | 1.11M | { |
1496 | 3.25M | while(!ps_tu_tmp->b1_first_tu_in_cu) |
1497 | 2.13M | { |
1498 | 2.13M | ps_tu_tmp--; |
1499 | 2.13M | } |
1500 | 1.11M | } |
1501 | 1.11M | u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode; |
1502 | 1.11M | } |
1503 | 1.11M | if(4 == u1_chroma_pred_mode) |
1504 | 845k | u1_chroma_pred_mode = u1_luma_pred_mode_first_tu; |
1505 | 266k | else |
1506 | 266k | { |
1507 | 266k | u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode]; |
1508 | | |
1509 | 266k | if(u1_chroma_pred_mode == u1_luma_pred_mode_first_tu) |
1510 | 29.0k | { |
1511 | 29.0k | u1_chroma_pred_mode = INTRA_ANGULAR(34); |
1512 | 29.0k | } |
1513 | 266k | } |
1514 | 1.11M | if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) |
1515 | 0 | { |
1516 | 0 | u1_chroma_pred_mode = gau1_intra_pred_chroma_modes_422[u1_chroma_pred_mode]; |
1517 | 0 | } |
1518 | 1.11M | } |
1519 | | |
1520 | | /* call the chroma reference array substitution */ |
1521 | 1.11M | ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr( |
1522 | 1.11M | pu1_top_left, |
1523 | 1.11M | pu1_top, pu1_left, |
1524 | 1.11M | ps_cb_tu->pred_strd, |
1525 | 1.11M | trans_size, chroma_nbr_flags, pu1_ref_sub_out, 1, |
1526 | 1.11M | ps_sps->i1_chroma_format_idc); |
1527 | | |
1528 | 1.11M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1529 | | /* call reference filtering */ |
1530 | 1.11M | if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) |
1531 | 0 | { |
1532 | 0 | ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_filtering_fptr( |
1533 | 0 | pu1_ref_sub_out, |
1534 | 0 | trans_size, |
1535 | 0 | pu1_ref_sub_out, |
1536 | 0 | u1_chroma_pred_mode, |
1537 | 0 | (ps_sps->i1_intra_smoothing_disabled_flag << 3 |
1538 | 0 | | ps_sps->i1_strong_intra_smoothing_enable_flag)); |
1539 | 0 | } |
1540 | 1.11M | #endif |
1541 | | |
1542 | | /* use the look up to get the function idx */ |
1543 | 1.11M | chroma_pred_func_idx = g_i4_ip_funcs[u1_chroma_pred_mode]; |
1544 | | |
1545 | | /* call the intra prediction function */ |
1546 | 1.11M | ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred_orig, ps_cb_tu->pred_strd, trans_size, u1_chroma_pred_mode); |
1547 | 1.11M | } |
1548 | 3.26M | } |
1549 | | |
1550 | | /* Updating number of transform types */ |
1551 | 44.7M | STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx); |
1552 | | |
1553 | 44.7M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1554 | 44.7M | iqitrecon_fptr = get_iqitrec_func( |
1555 | 44.7M | ps_proc, ps_tu, ps_cb_tu, log2_trans_size, |
1556 | 44.7M | c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag, |
1557 | 44.7M | c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode); |
1558 | 44.7M | #endif |
1559 | | /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */ |
1560 | 44.7M | iqitrecon_fptr(ps_proc, ps_tu, ps_cb_tu, func_idx, log2_trans_size, |
1561 | 44.7M | c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag, |
1562 | 44.7M | c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode); |
1563 | | /* IQ, IT and Recon for V */ |
1564 | 44.7M | if(c_idx != 0) |
1565 | 1.81M | { |
1566 | 1.81M | #ifdef ENABLE_MAIN_REXT_PROFILE |
1567 | 1.81M | iqitrecon_fptr = get_iqitrec_func(ps_proc, ps_tu, ps_cr_tu, log2_trans_size, |
1568 | 1.81M | V_PLANE, intra_flag, u1_chroma_pred_mode); |
1569 | 1.81M | #endif |
1570 | 1.81M | iqitrecon_fptr(ps_proc, ps_tu, ps_cr_tu, func_idx, log2_trans_size, V_PLANE, |
1571 | 1.81M | intra_flag, u1_chroma_pred_mode); |
1572 | 1.81M | } |
1573 | 44.7M | } |
1574 | 44.7M | while(c_idx != 0 && ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 |
1575 | 0 | && ++subtu_idx < 2); |
1576 | 44.7M | } |
1577 | | |
1578 | | /* Neighbor availability inside CTB */ |
1579 | | /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */ |
1580 | | /* Used for neighbor availability in intra pred */ |
1581 | 44.7M | if(c_idx == 0) |
1582 | 43.0M | { |
1583 | 43.0M | WORD32 i; |
1584 | 43.0M | WORD32 trans_in_min_tu; |
1585 | 43.0M | UWORD32 cur_tu_in_bits; |
1586 | 43.0M | UWORD32 cur_tu_avail_flag; |
1587 | | |
1588 | 43.0M | trans_in_min_tu = trans_size / MIN_TU_SIZE; |
1589 | 43.0M | cur_tu_in_bits = (1 << trans_in_min_tu) - 1; |
1590 | 43.0M | cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu); |
1591 | | |
1592 | 43.0M | cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1); |
1593 | | |
1594 | 135M | for(i = 0; i < trans_in_min_tu; i++) |
1595 | 92.1M | au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |= |
1596 | 92.1M | cur_tu_avail_flag; |
1597 | 43.0M | } |
1598 | 44.7M | } |
1599 | 43.0M | } |
1600 | 2.61M | ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data; |
1601 | | |
1602 | 2.61M | return ps_proc->i4_ctb_tu_cnt; |
1603 | 2.61M | } |
1604 | | |