/src/libhevc/decoder/ihevcd_process_slice.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevcd_process_slice.c |
22 | | * |
23 | | * @brief |
24 | | * Contains functions for processing slice data |
25 | | * |
26 | | * @author |
27 | | * Harish |
28 | | * |
29 | | * @par List of Functions: |
30 | | * |
31 | | * @remarks |
32 | | * None |
33 | | * |
34 | | ******************************************************************************* |
35 | | */ |
36 | | /*****************************************************************************/ |
37 | | /* File Includes */ |
38 | | /*****************************************************************************/ |
39 | | #include <stdio.h> |
40 | | #include <stddef.h> |
41 | | #include <stdlib.h> |
42 | | #include <string.h> |
43 | | #include <assert.h> |
44 | | |
45 | | #include "ihevc_typedefs.h" |
46 | | #include "iv.h" |
47 | | #include "ivd.h" |
48 | | #include "ihevcd_cxa.h" |
49 | | #include "ithread.h" |
50 | | |
51 | | #include "ihevc_defs.h" |
52 | | #include "ihevc_debug.h" |
53 | | #include "ihevc_defs.h" |
54 | | #include "ihevc_structs.h" |
55 | | #include "ihevc_macros.h" |
56 | | #include "ihevc_platform_macros.h" |
57 | | #include "ihevc_cabac_tables.h" |
58 | | #include "ihevc_padding.h" |
59 | | #include "ihevc_iquant_itrans_recon.h" |
60 | | #include "ihevc_chroma_iquant_itrans_recon.h" |
61 | | #include "ihevc_recon.h" |
62 | | #include "ihevc_chroma_recon.h" |
63 | | #include "ihevc_iquant_recon.h" |
64 | | #include "ihevc_chroma_iquant_recon.h" |
65 | | #include "ihevc_intra_pred.h" |
66 | | |
67 | | #include "ihevc_error.h" |
68 | | #include "ihevc_common_tables.h" |
69 | | #include "ihevc_quant_tables.h" |
70 | | #include "ihevcd_common_tables.h" |
71 | | |
72 | | #include "ihevcd_profile.h" |
73 | | #include "ihevcd_trace.h" |
74 | | #include "ihevcd_defs.h" |
75 | | #include "ihevcd_function_selector.h" |
76 | | #include "ihevcd_structs.h" |
77 | | #include "ihevcd_error.h" |
78 | | #include "ihevcd_nal.h" |
79 | | #include "ihevcd_bitstream.h" |
80 | | #include "ihevcd_job_queue.h" |
81 | | #include "ihevcd_utils.h" |
82 | | #include "ihevcd_debug.h" |
83 | | #include "ihevcd_get_mv.h" |
84 | | #include "ihevcd_inter_pred.h" |
85 | | #include "ihevcd_iquant_itrans_recon_ctb.h" |
86 | | #include "ihevcd_boundary_strength.h" |
87 | | #include "ihevcd_deblk.h" |
88 | | #include "ihevcd_fmt_conv.h" |
89 | | #include "ihevcd_sao.h" |
90 | | #include "ihevcd_profile.h" |
91 | | |
92 | | IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, |
93 | | process_ctxt_t *ps_proc, |
94 | | UWORD8 *pu1_y_dst, |
95 | | UWORD8 *pu1_u_dst, |
96 | | UWORD8 *pu1_v_dst, |
97 | | WORD32 cur_row, |
98 | | WORD32 num_rows); |
99 | | |
100 | | typedef enum |
101 | | { |
102 | | PROC_ALL, |
103 | | PROC_INTER_PRED, |
104 | | PROC_RECON, |
105 | | PROC_DEBLK, |
106 | | PROC_SAO |
107 | | }proc_type_t; |
108 | | |
109 | | void ihevcd_proc_map_check(process_ctxt_t *ps_proc, proc_type_t proc_type, WORD32 nctb) |
110 | 18.4M | { |
111 | 18.4M | tile_t *ps_tile = ps_proc->ps_tile; |
112 | 18.4M | sps_t *ps_sps = ps_proc->ps_sps; |
113 | 18.4M | pps_t *ps_pps = ps_proc->ps_pps; |
114 | 18.4M | codec_t *ps_codec = ps_proc->ps_codec; |
115 | 18.4M | WORD32 idx; |
116 | 18.4M | WORD32 nop_cnt; |
117 | 18.4M | WORD32 bit_pos = proc_type; |
118 | 18.4M | WORD32 bit_mask = (1 << bit_pos); |
119 | | |
120 | 18.4M | if(ps_proc->i4_check_proc_status) |
121 | 14.2M | { |
122 | 14.2M | nop_cnt = PROC_NOP_CNT; |
123 | 200M | while(1) |
124 | 200M | { |
125 | 200M | volatile UWORD8 *pu1_buf; |
126 | 200M | volatile WORD32 status; |
127 | 200M | status = 1; |
128 | | /* Check if all dependencies for the next nCTBs are met */ |
129 | 200M | { |
130 | 200M | WORD32 x_pos; |
131 | | |
132 | 200M | { |
133 | | /* Check if the top right of next nCTBs are processed */ |
134 | 200M | if(ps_proc->i4_ctb_y > 0) |
135 | 198M | { |
136 | 198M | x_pos = (ps_proc->i4_ctb_tile_x + nctb); |
137 | 198M | idx = MIN(x_pos, (ps_tile->u2_wd - 1)); |
138 | | |
139 | | /* Check if top-right CTB for the last CTB in nCTB is within the tile */ |
140 | 198M | { |
141 | 198M | idx += ps_tile->u1_pos_x; |
142 | 198M | idx += ((ps_proc->i4_ctb_y - 1) |
143 | 198M | * ps_sps->i2_pic_wd_in_ctb); |
144 | 198M | pu1_buf = (ps_codec->pu1_proc_map + idx); |
145 | 198M | status = *pu1_buf & bit_mask; |
146 | 198M | } |
147 | 198M | } |
148 | 200M | } |
149 | | |
150 | | /* If tiles are enabled, then test left and top-left as well */ |
151 | 200M | ps_pps = ps_proc->ps_pps; |
152 | 200M | if(ps_pps->i1_tiles_enabled_flag) |
153 | 5.12M | { |
154 | | /*Check if left ctb is processed*/ |
155 | 5.12M | if((ps_proc->i4_ctb_x > 0) && ((0 != status))) |
156 | 464k | { |
157 | 464k | x_pos = ps_tile->u1_pos_x + ps_proc->i4_ctb_tile_x - 1; |
158 | 464k | idx = x_pos + (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
159 | 464k | pu1_buf = (ps_codec->pu1_proc_map + idx); |
160 | 464k | status = *pu1_buf & bit_mask; |
161 | 464k | } |
162 | | |
163 | | /*Check if top left ctb is processed*/ |
164 | 5.12M | if((ps_proc->i4_ctb_x > 0) && (0 != status) && (ps_proc->i4_ctb_y > 0)) |
165 | 329k | { |
166 | 329k | x_pos = ps_tile->u1_pos_x + ps_proc->i4_ctb_tile_x - 1; |
167 | 329k | idx = x_pos + ((ps_proc->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb); |
168 | 329k | pu1_buf = (ps_codec->pu1_proc_map + idx); |
169 | 329k | status = *pu1_buf & bit_mask; |
170 | 329k | } |
171 | 5.12M | } |
172 | 200M | } |
173 | | |
174 | 200M | if(status) |
175 | 14.2M | break; |
176 | | |
177 | | /* if dependencies are not met, then wait for few cycles. |
178 | | * Even after few iterations, if the dependencies are not met then yield |
179 | | */ |
180 | 186M | if(nop_cnt > 0) |
181 | 165M | { |
182 | 165M | NOP(128); |
183 | 165M | nop_cnt -= 128; |
184 | 165M | } |
185 | 20.4M | else |
186 | 20.4M | { |
187 | 20.4M | nop_cnt = PROC_NOP_CNT; |
188 | 20.4M | ithread_yield(); |
189 | | //NOP(128 * 16); |
190 | 20.4M | } |
191 | 186M | } |
192 | 14.2M | DATA_SYNC(); |
193 | 14.2M | } |
194 | 18.4M | } |
195 | | |
196 | | void ihevcd_proc_map_update(process_ctxt_t *ps_proc, proc_type_t proc_type, WORD32 nctb) |
197 | 23.0M | { |
198 | 23.0M | codec_t *ps_codec = ps_proc->ps_codec; |
199 | 23.0M | WORD32 i, idx; |
200 | 23.0M | WORD32 bit_pos = proc_type; |
201 | 23.0M | WORD32 bit_mask = (1 << bit_pos); |
202 | | |
203 | | /* Update the current CTBs processing status */ |
204 | 23.0M | if(ps_proc->i4_check_proc_status) |
205 | 17.7M | { |
206 | 17.7M | DATA_SYNC(); |
207 | 35.6M | for(i = 0; i < nctb; i++) |
208 | 17.8M | { |
209 | 17.8M | sps_t *ps_sps = ps_proc->ps_sps; |
210 | 17.8M | UWORD8 *pu1_buf; |
211 | 17.8M | idx = (ps_proc->i4_ctb_x + i); |
212 | 17.8M | idx += ((ps_proc->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb); |
213 | 17.8M | pu1_buf = (ps_codec->pu1_proc_map + idx); |
214 | 17.8M | *pu1_buf = *pu1_buf | bit_mask; |
215 | 17.8M | } |
216 | 17.7M | } |
217 | 23.0M | } |
218 | | |
219 | | |
220 | | void ihevcd_slice_hdr_update(process_ctxt_t *ps_proc) |
221 | 18.4M | { |
222 | | |
223 | | /* Slice x and y are initialized in proc_init. But initialize slice x and y count here |
224 | | * if a new slice begins at the middle of a row since proc_init is invoked only at the beginning of each row */ |
225 | 18.4M | if(!((ps_proc->i4_ctb_x == 0) && (ps_proc->i4_ctb_y == 0))) |
226 | 18.4M | { |
227 | 18.4M | slice_header_t *ps_slice_hdr_next = ps_proc->ps_codec->ps_slice_hdr_base + ((ps_proc->i4_cur_slice_idx + 1) & (MAX_SLICE_HDR_CNT - 1)); |
228 | | |
229 | 18.4M | if((ps_slice_hdr_next->i2_ctb_x == ps_proc->i4_ctb_x) |
230 | 405k | && (ps_slice_hdr_next->i2_ctb_y == ps_proc->i4_ctb_y)) |
231 | 35.8k | { |
232 | 35.8k | if(0 == ps_slice_hdr_next->i1_dependent_slice_flag) |
233 | 28.0k | { |
234 | 28.0k | ps_proc->i4_ctb_slice_x = 0; |
235 | 28.0k | ps_proc->i4_ctb_slice_y = 0; |
236 | 28.0k | } |
237 | | |
238 | 35.8k | ps_proc->i4_cur_slice_idx++; |
239 | 35.8k | ps_proc->ps_slice_hdr = ps_slice_hdr_next; |
240 | 35.8k | } |
241 | | |
242 | 18.4M | } |
243 | 18.4M | } |
244 | | |
245 | | void ihevcd_ctb_pos_update(process_ctxt_t *ps_proc, WORD32 nctb) |
246 | 13.8M | { |
247 | 13.8M | WORD32 tile_start_ctb_idx, slice_start_ctb_idx; |
248 | 13.8M | slice_header_t *ps_slice_hdr = ps_proc->ps_slice_hdr; |
249 | 13.8M | tile_t *ps_tile = ps_proc->ps_tile; |
250 | 13.8M | sps_t *ps_sps = ps_proc->ps_sps; |
251 | | |
252 | | /* Update x and y positions */ |
253 | 13.8M | ps_proc->i4_ctb_tile_x += nctb; |
254 | 13.8M | ps_proc->i4_ctb_x += nctb; |
255 | | |
256 | 13.8M | ps_proc->i4_ctb_slice_x += nctb; |
257 | | /*If tile are enabled, then handle the tile & slice counters differently*/ |
258 | 13.8M | if(ps_proc->ps_pps->i1_tiles_enabled_flag) |
259 | 2.35M | { |
260 | | /* Update slice counters*/ |
261 | 2.35M | slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
262 | 2.35M | tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
263 | | /* |
264 | | * There can be 2 cases where slice counters must be handled differently. |
265 | | * 1 - Multiple tiles span across a single/one of the many slice. |
266 | | * 2 - Multiple slices span across a single/one of the many tiles. |
267 | | */ |
268 | | |
269 | | /*Case 1 */ |
270 | 2.35M | if(slice_start_ctb_idx < tile_start_ctb_idx) |
271 | 1.78M | { |
272 | | /*End of tile row*/ |
273 | 1.78M | if(ps_proc->i4_ctb_x > ps_slice_hdr->i2_ctb_x) |
274 | 1.68M | { |
275 | 1.68M | if(ps_proc->i4_ctb_slice_x >= (ps_tile->u2_wd + ps_tile->u1_pos_x)) |
276 | 117k | { |
277 | 117k | ps_proc->i4_ctb_slice_y++; |
278 | 117k | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x |
279 | 117k | - ps_tile->u2_wd; |
280 | 117k | } |
281 | 1.68M | } |
282 | 96.1k | else |
283 | 96.1k | { |
284 | 96.1k | WORD32 temp_stride = (ps_sps->i2_pic_wd_in_ctb - ps_slice_hdr->i2_ctb_x); |
285 | 96.1k | if(ps_proc->i4_ctb_slice_x >= (temp_stride + ps_tile->u2_wd + ps_tile->u1_pos_x)) |
286 | 5.67k | { |
287 | 5.67k | ps_proc->i4_ctb_slice_y++; |
288 | 5.67k | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x |
289 | 5.67k | - ps_tile->u2_wd; |
290 | 5.67k | } |
291 | 96.1k | } |
292 | 1.78M | } |
293 | | /*Case 2*/ |
294 | 568k | else if(ps_proc->i4_ctb_slice_x >= (ps_tile->u2_wd)) |
295 | 55.0k | { |
296 | | /*End of tile row*/ |
297 | 55.0k | ps_proc->i4_ctb_slice_y++; |
298 | 55.0k | ps_proc->i4_ctb_slice_x = 0; |
299 | 55.0k | } |
300 | 2.35M | } |
301 | 11.5M | else |
302 | 11.5M | { |
303 | 11.5M | if(ps_proc->i4_ctb_slice_x >= ps_tile->u2_wd) |
304 | 410k | { |
305 | 410k | ps_proc->i4_ctb_slice_y++; |
306 | 410k | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x |
307 | 410k | - ps_tile->u2_wd; |
308 | 410k | } |
309 | 11.5M | } |
310 | 13.8M | } |
311 | | |
312 | | void ihevcd_ctb_avail_update(process_ctxt_t *ps_proc) |
313 | 9.24M | { |
314 | 9.24M | slice_header_t *ps_slice_hdr = ps_proc->ps_slice_hdr; |
315 | 9.24M | sps_t *ps_sps = ps_proc->ps_sps; |
316 | 9.24M | tile_t *ps_tile_prev; |
317 | 9.24M | tile_t *ps_tile = ps_proc->ps_tile; |
318 | 9.24M | WORD32 cur_pu_idx; |
319 | 9.24M | WORD32 tile_start_ctb_idx, slice_start_ctb_idx; |
320 | 9.24M | WORD16 i2_wd_in_ctb; |
321 | 9.24M | WORD32 continuous_tiles = 0; |
322 | 9.24M | WORD32 cur_ctb_idx; |
323 | 9.24M | WORD32 check_tile_wd; |
324 | | |
325 | 9.24M | if((0 != ps_tile->u1_pos_x) && (0 != ps_tile->u1_pos_y)) |
326 | 809k | { |
327 | 809k | ps_tile_prev = ps_tile - 1; |
328 | 809k | } |
329 | 8.43M | else |
330 | 8.43M | { |
331 | 8.43M | ps_tile_prev = ps_tile; |
332 | 8.43M | } |
333 | | |
334 | | |
335 | 9.24M | check_tile_wd = ps_slice_hdr->i2_ctb_x + ps_tile_prev->u2_wd; |
336 | 9.24M | if(!(((check_tile_wd >= ps_sps->i2_pic_wd_in_ctb) && (check_tile_wd % ps_sps->i2_pic_wd_in_ctb == ps_tile->u1_pos_x)) |
337 | 1.60M | || ((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x)))) |
338 | 1.24M | { |
339 | 1.24M | continuous_tiles = 1; |
340 | 1.24M | } |
341 | | |
342 | 9.24M | slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
343 | 9.24M | tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
344 | | |
345 | 9.24M | if((slice_start_ctb_idx < tile_start_ctb_idx) && (continuous_tiles)) |
346 | 967k | { |
347 | | //Slices span across multiple tiles. |
348 | 967k | i2_wd_in_ctb = ps_sps->i2_pic_wd_in_ctb; |
349 | 967k | } |
350 | 8.27M | else |
351 | 8.27M | { |
352 | 8.27M | i2_wd_in_ctb = ps_tile->u2_wd; |
353 | 8.27M | } |
354 | 9.24M | cur_ctb_idx = ps_proc->i4_ctb_x |
355 | 9.24M | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
356 | | |
357 | | /* Ctb level availability */ |
358 | | /* Bottom left will not be available at a CTB level, no need to pass this */ |
359 | 9.24M | ps_proc->u1_top_ctb_avail = 1; |
360 | 9.24M | ps_proc->u1_left_ctb_avail = 1; |
361 | 9.24M | ps_proc->u1_top_lt_ctb_avail = 1; |
362 | 9.24M | ps_proc->u1_top_rt_ctb_avail = 1; |
363 | | /* slice and tile boundaries */ |
364 | | |
365 | 9.24M | if((0 == ps_proc->i4_ctb_y) || (0 == ps_proc->i4_ctb_tile_y)) |
366 | 990k | { |
367 | 990k | ps_proc->u1_top_ctb_avail = 0; |
368 | 990k | ps_proc->u1_top_lt_ctb_avail = 0; |
369 | 990k | ps_proc->u1_top_rt_ctb_avail = 0; |
370 | 990k | } |
371 | | |
372 | 9.24M | if((0 == ps_proc->i4_ctb_x) || (0 == ps_proc->i4_ctb_tile_x)) |
373 | 410k | { |
374 | 410k | ps_proc->u1_left_ctb_avail = 0; |
375 | 410k | ps_proc->u1_top_lt_ctb_avail = 0; |
376 | 410k | if((0 == ps_proc->i4_ctb_slice_y) || (0 == ps_proc->i4_ctb_tile_y)) |
377 | 65.6k | { |
378 | 65.6k | ps_proc->u1_top_ctb_avail = 0; |
379 | 65.6k | if((i2_wd_in_ctb - 1) != ps_proc->i4_ctb_slice_x) |
380 | 61.5k | { |
381 | 61.5k | ps_proc->u1_top_rt_ctb_avail = 0; |
382 | 61.5k | } |
383 | 65.6k | } |
384 | 410k | } |
385 | | /*For slices not beginning at start of a ctb row*/ |
386 | 8.83M | else if(ps_proc->i4_ctb_x > 0) |
387 | 8.83M | { |
388 | 8.83M | if((0 == ps_proc->i4_ctb_slice_y) || (0 == ps_proc->i4_ctb_tile_y)) |
389 | 1.06M | { |
390 | 1.06M | ps_proc->u1_top_ctb_avail = 0; |
391 | 1.06M | ps_proc->u1_top_lt_ctb_avail = 0; |
392 | 1.06M | if(0 == ps_proc->i4_ctb_slice_x) |
393 | 15.8k | { |
394 | 15.8k | ps_proc->u1_left_ctb_avail = 0; |
395 | 15.8k | } |
396 | 1.06M | if((i2_wd_in_ctb - 1) != ps_proc->i4_ctb_slice_x) |
397 | 1.02M | { |
398 | 1.02M | ps_proc->u1_top_rt_ctb_avail = 0; |
399 | 1.02M | } |
400 | 1.06M | } |
401 | 7.76M | else if((1 == ps_proc->i4_ctb_slice_y) && (0 == ps_proc->i4_ctb_slice_x)) |
402 | 1.44k | { |
403 | 1.44k | ps_proc->u1_top_lt_ctb_avail = 0; |
404 | 1.44k | } |
405 | 8.83M | } |
406 | | |
407 | 9.24M | if((ps_proc->i4_ctb_x == (ps_sps->i2_pic_wd_in_ctb - 1)) || ((ps_tile->u2_wd - 1) == ps_proc->i4_ctb_tile_x)) |
408 | 410k | { |
409 | 410k | ps_proc->u1_top_rt_ctb_avail = 0; |
410 | 410k | } |
411 | | |
412 | | |
413 | 9.24M | { |
414 | 9.24M | WORD32 next_ctb_idx; |
415 | 9.24M | next_ctb_idx = cur_ctb_idx + 1; |
416 | | |
417 | 9.24M | if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1)) |
418 | 410k | { |
419 | 410k | if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht) |
420 | 54.5k | { |
421 | | //Last tile |
422 | 54.5k | if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb)) |
423 | 26.8k | { |
424 | 26.8k | next_ctb_idx = cur_ctb_idx + 1; |
425 | 26.8k | } |
426 | 27.6k | else //Not last tile, but new tile |
427 | 27.6k | { |
428 | 27.6k | tile_t *ps_tile_next = ps_proc->ps_tile + 1; |
429 | 27.6k | next_ctb_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
430 | 27.6k | } |
431 | 54.5k | } |
432 | 356k | else //End of each tile row |
433 | 356k | { |
434 | 356k | next_ctb_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x; |
435 | 356k | } |
436 | 410k | } |
437 | 9.24M | ps_proc->i4_next_pu_ctb_cnt = next_ctb_idx; |
438 | 9.24M | ps_proc->i4_ctb_pu_cnt = |
439 | 9.24M | ps_proc->pu4_pic_pu_idx[next_ctb_idx] |
440 | 9.24M | - ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
441 | 9.24M | cur_pu_idx = ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
442 | 9.24M | ps_proc->i4_ctb_start_pu_idx = cur_pu_idx; |
443 | 9.24M | ps_proc->ps_pu = &ps_proc->ps_pic_pu[cur_pu_idx]; |
444 | 9.24M | } |
445 | 9.24M | } |
446 | | |
447 | | void ihevcd_update_ctb_tu_cnt(process_ctxt_t *ps_proc) |
448 | 6.72M | { |
449 | 6.72M | sps_t *ps_sps = ps_proc->ps_sps; |
450 | 6.72M | codec_t *ps_codec = ps_proc->ps_codec; |
451 | 6.72M | WORD32 cur_ctb_idx; |
452 | | |
453 | 6.72M | cur_ctb_idx = ps_proc->i4_ctb_x |
454 | 6.72M | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
455 | | |
456 | 6.72M | { |
457 | 6.72M | tile_t *ps_tile; |
458 | 6.72M | WORD32 next_ctb_tu_idx; |
459 | 6.72M | ps_tile = ps_proc->ps_tile; |
460 | | |
461 | | |
462 | 6.72M | if(1 == ps_codec->i4_num_cores) |
463 | 1.05M | { |
464 | 1.05M | next_ctb_tu_idx = cur_ctb_idx % RESET_TU_BUF_NCTB + 1; |
465 | 1.05M | if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1)) |
466 | 92.4k | { |
467 | 92.4k | if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht) |
468 | 10.0k | { |
469 | | //Last tile |
470 | 10.0k | if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb)) |
471 | 3.37k | { |
472 | 3.37k | next_ctb_tu_idx = (cur_ctb_idx % RESET_TU_BUF_NCTB) + 1; |
473 | 3.37k | } |
474 | 6.66k | else //Not last tile, but new tile |
475 | 6.66k | { |
476 | 6.66k | tile_t *ps_tile_next = ps_proc->ps_tile + 1; |
477 | 6.66k | next_ctb_tu_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
478 | 6.66k | } |
479 | 10.0k | } |
480 | 82.4k | else //End of each tile row |
481 | 82.4k | { |
482 | 82.4k | next_ctb_tu_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x; |
483 | 82.4k | } |
484 | 92.4k | } |
485 | 1.05M | ps_proc->i4_next_tu_ctb_cnt = next_ctb_tu_idx; |
486 | 1.05M | ps_proc->i4_ctb_tu_cnt = ps_proc->pu4_pic_tu_idx[next_ctb_tu_idx] - ps_proc->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB]; |
487 | 1.05M | } |
488 | 5.66M | else |
489 | 5.66M | { |
490 | 5.66M | next_ctb_tu_idx = cur_ctb_idx + 1; |
491 | 5.66M | if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1)) |
492 | 184k | { |
493 | 184k | if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht) |
494 | 26.5k | { |
495 | | //Last tile |
496 | 26.5k | if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb)) |
497 | 16.5k | { |
498 | 16.5k | next_ctb_tu_idx = (cur_ctb_idx % RESET_TU_BUF_NCTB) + 1; |
499 | 16.5k | } |
500 | 10.0k | else //Not last tile, but new tile |
501 | 10.0k | { |
502 | 10.0k | tile_t *ps_tile_next = ps_proc->ps_tile + 1; |
503 | 10.0k | next_ctb_tu_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
504 | 10.0k | } |
505 | 26.5k | } |
506 | 158k | else //End of each tile row |
507 | 158k | { |
508 | 158k | next_ctb_tu_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x; |
509 | 158k | } |
510 | 184k | } |
511 | 5.66M | ps_proc->i4_next_tu_ctb_cnt = next_ctb_tu_idx; |
512 | 5.66M | ps_proc->i4_ctb_tu_cnt = ps_proc->pu4_pic_tu_idx[next_ctb_tu_idx] - |
513 | 5.66M | ps_proc->pu4_pic_tu_idx[cur_ctb_idx]; |
514 | 5.66M | } |
515 | 6.72M | } |
516 | 6.72M | } |
517 | | |
518 | | IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) |
519 | 205k | { |
520 | 205k | IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; |
521 | 205k | codec_t *ps_codec; |
522 | 205k | sps_t *ps_sps = ps_proc->ps_sps; |
523 | | |
524 | 205k | WORD32 nctb; |
525 | 205k | WORD32 i; |
526 | 205k | WORD32 idx; |
527 | 205k | WORD32 nop_cnt; |
528 | 205k | WORD32 num_minpu_in_ctb; |
529 | 205k | WORD32 cur_slice_idx, cur_ctb_tile_x, cur_ctb_slice_x, cur_ctb_tile_y, cur_ctb_slice_y; |
530 | 205k | WORD32 nxt_ctb_slice_y, nxt_ctb_slice_x; |
531 | 205k | tu_t *ps_tu_cur, *ps_tu_nxt; |
532 | 205k | UWORD8 *pu1_pu_map_cur, *pu1_pu_map_nxt; |
533 | 205k | WORD32 num_ctb, num_ctb_tmp; |
534 | 205k | proc_type_t proc_type; |
535 | | |
536 | | |
537 | 205k | WORD32 ctb_size = 1 << ps_sps->i1_log2_ctb_size; |
538 | | |
539 | 205k | PROFILE_DISABLE_PROCESS_CTB(); |
540 | | |
541 | 205k | ps_codec = ps_proc->ps_codec; |
542 | 205k | num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE); |
543 | | |
544 | 205k | nctb = MIN(ps_codec->i4_proc_nctb, ps_proc->i4_ctb_cnt); |
545 | 205k | nctb = MIN(nctb, (ps_proc->ps_tile->u2_wd - ps_proc->i4_ctb_tile_x)); |
546 | | |
547 | 205k | if(ps_proc->i4_cur_slice_idx > (MAX_SLICE_HDR_CNT - 2 * ps_sps->i2_pic_wd_in_ctb)) |
548 | 3.00k | { |
549 | 3.00k | num_ctb = 1; |
550 | 3.00k | } |
551 | 202k | else |
552 | 202k | { |
553 | 202k | num_ctb = ps_proc->i4_nctb; |
554 | 202k | } |
555 | 205k | nxt_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
556 | 205k | nxt_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
557 | 205k | pu1_pu_map_nxt = ps_proc->pu1_pu_map; |
558 | 205k | ps_tu_nxt = ps_proc->ps_tu; |
559 | | |
560 | 752k | while(ps_proc->i4_ctb_cnt) |
561 | 546k | { |
562 | 546k | ps_proc->i4_ctb_slice_y = nxt_ctb_slice_y; |
563 | 546k | ps_proc->i4_ctb_slice_x = nxt_ctb_slice_x; |
564 | 546k | ps_proc->pu1_pu_map = pu1_pu_map_nxt; |
565 | 546k | ps_proc->ps_tu = ps_tu_nxt; |
566 | | |
567 | 546k | cur_ctb_tile_x = ps_proc->i4_ctb_tile_x; |
568 | 546k | cur_ctb_tile_y = ps_proc->i4_ctb_tile_y; |
569 | 546k | cur_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
570 | 546k | cur_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
571 | 546k | cur_slice_idx = ps_proc->i4_cur_slice_idx; |
572 | 546k | ps_tu_cur = ps_proc->ps_tu; |
573 | 546k | pu1_pu_map_cur = ps_proc->pu1_pu_map; |
574 | 546k | proc_type = PROC_INTER_PRED; |
575 | | |
576 | 546k | if(ps_proc->i4_ctb_cnt < num_ctb) |
577 | 0 | { |
578 | 0 | num_ctb = ps_proc->i4_ctb_cnt; |
579 | 0 | } |
580 | 546k | num_ctb_tmp = num_ctb; |
581 | | |
582 | 5.16M | while(num_ctb_tmp) |
583 | 4.62M | { |
584 | 4.62M | slice_header_t *ps_slice_hdr; |
585 | 4.62M | tile_t *ps_tile = ps_proc->ps_tile; |
586 | | |
587 | | /* Waiting for Parsing to be done*/ |
588 | 4.62M | { |
589 | | |
590 | | |
591 | 4.62M | nop_cnt = PROC_NOP_CNT; |
592 | 4.62M | if(ps_proc->i4_check_parse_status || ps_proc->i4_check_proc_status) |
593 | 3.56M | { |
594 | 10.8M | while(1) |
595 | 10.8M | { |
596 | 10.8M | volatile UWORD8 *pu1_buf; |
597 | 10.8M | volatile WORD32 status; |
598 | 10.8M | status = 1; |
599 | | /* Check if all dependencies for the next nCTBs are met */ |
600 | | /* Check if the next nCTBs are parsed */ |
601 | 10.8M | if(ps_proc->i4_check_parse_status) |
602 | 10.8M | { |
603 | 10.8M | idx = (ps_proc->i4_ctb_x + nctb - 1); |
604 | 10.8M | idx += (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
605 | 10.8M | pu1_buf = (ps_codec->pu1_parse_map + idx); |
606 | 10.8M | status = *pu1_buf; |
607 | 10.8M | } |
608 | | |
609 | 10.8M | if(status) |
610 | 3.56M | break; |
611 | | |
612 | | /* if dependencies are not met, then wait for few cycles. |
613 | | * Even after few iterations, if the dependencies are not met then yield |
614 | | */ |
615 | 7.24M | if(nop_cnt > 0) |
616 | 6.49M | { |
617 | 6.49M | NOP(128); |
618 | 6.49M | nop_cnt -= 128; |
619 | 6.49M | } |
620 | 749k | else |
621 | 749k | { |
622 | 749k | nop_cnt = PROC_NOP_CNT; |
623 | 749k | ithread_yield(); |
624 | 749k | } |
625 | 7.24M | } |
626 | 3.56M | } |
627 | 4.62M | } |
628 | | |
629 | | /* Check proc map to ensure dependencies for recon are met */ |
630 | 4.62M | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
631 | | |
632 | 4.62M | ihevcd_slice_hdr_update(ps_proc); |
633 | 4.62M | ps_slice_hdr = ps_proc->ps_slice_hdr; |
634 | | |
635 | | //ihevcd_mv_prediction(); |
636 | | //ihevcd_lvl_unpack(); |
637 | | //ihevcd_inter_iq_it_recon(); |
638 | | //Following does prediction, iq, it and recon on a TU by TU basis for intra TUs |
639 | | //ihevcd_intra_process(); |
640 | | //ihevcd_ctb_boundary_strength_islice(ps_proc, ctb_size); |
641 | | //ihevcd_deblk_ctb(ps_proc); |
642 | | |
643 | | /* iq,it recon of Intra TU */ |
644 | 4.62M | { |
645 | 4.62M | UWORD32 *pu4_ctb_top_pu_idx, *pu4_ctb_left_pu_idx, *pu4_ctb_top_left_pu_idx; |
646 | 4.62M | WORD32 cur_ctb_idx; |
647 | | |
648 | 4.62M | ihevcd_ctb_avail_update(ps_proc); |
649 | | |
650 | | #if DEBUG_DUMP_FRAME_BUFFERS_INFO |
651 | | au1_pic_avail_ctb_flags[ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb] = |
652 | | ((ps_proc->u1_top_ctb_avail << 3) | (ps_proc->u1_left_ctb_avail << 2) | (ps_proc->u1_top_lt_ctb_avail << 1) | (ps_proc->u1_top_rt_ctb_avail)); |
653 | | au4_pic_ctb_slice_xy[ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb] = |
654 | | (((UWORD16)ps_proc->i4_ctb_slice_x << 16) | ((UWORD16)ps_proc->i4_ctb_slice_y << 16)); |
655 | | #endif |
656 | | |
657 | | /*************************************************/ |
658 | | /**************** MV pred **********************/ |
659 | | /*************************************************/ |
660 | 4.62M | if(PSLICE == ps_slice_hdr->i1_slice_type |
661 | 4.38M | || BSLICE == ps_slice_hdr->i1_slice_type) |
662 | 1.33M | { |
663 | 1.33M | mv_ctxt_t s_mv_ctxt; |
664 | | |
665 | 1.33M | pu4_ctb_top_pu_idx = ps_proc->pu4_pic_pu_idx_top |
666 | 1.33M | + (ps_proc->i4_ctb_x * ctb_size / MIN_PU_SIZE); |
667 | 1.33M | pu4_ctb_left_pu_idx = ps_proc->pu4_pic_pu_idx_left; |
668 | 1.33M | pu4_ctb_top_left_pu_idx = &ps_proc->u4_ctb_top_left_pu_idx; |
669 | | |
670 | | /* Initializing s_mv_ctxt */ |
671 | 1.33M | if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD) |
672 | 503k | { |
673 | 503k | s_mv_ctxt.ps_pps = ps_proc->ps_pps; |
674 | 503k | s_mv_ctxt.ps_sps = ps_proc->ps_sps; |
675 | 503k | s_mv_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
676 | 503k | s_mv_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
677 | 503k | s_mv_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
678 | 503k | s_mv_ctxt.ps_pu = ps_proc->ps_pu; |
679 | 503k | s_mv_ctxt.ps_pic_pu = ps_proc->ps_pic_pu; |
680 | 503k | s_mv_ctxt.ps_tile = ps_tile; |
681 | 503k | s_mv_ctxt.pu4_pic_pu_idx_map = ps_proc->pu4_pic_pu_idx_map; |
682 | 503k | s_mv_ctxt.pu4_pic_pu_idx = ps_proc->pu4_pic_pu_idx; |
683 | 503k | s_mv_ctxt.pu1_pic_pu_map = ps_proc->pu1_pic_pu_map; |
684 | 503k | s_mv_ctxt.i4_ctb_pu_cnt = ps_proc->i4_ctb_pu_cnt; |
685 | 503k | s_mv_ctxt.i4_ctb_start_pu_idx = ps_proc->i4_ctb_start_pu_idx; |
686 | 503k | s_mv_ctxt.u1_top_ctb_avail = ps_proc->u1_top_ctb_avail; |
687 | 503k | s_mv_ctxt.u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail; |
688 | 503k | s_mv_ctxt.u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail; |
689 | 503k | s_mv_ctxt.u1_left_ctb_avail = ps_proc->u1_left_ctb_avail; |
690 | | |
691 | 503k | ihevcd_get_mv_ctb(&s_mv_ctxt, pu4_ctb_top_pu_idx, |
692 | 503k | pu4_ctb_left_pu_idx, pu4_ctb_top_left_pu_idx); |
693 | 503k | } |
694 | | |
695 | 1.33M | ihevcd_inter_pred_ctb(ps_proc); |
696 | 1.33M | } |
697 | 3.28M | else if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD) |
698 | 1.86M | { |
699 | 1.86M | WORD32 next_ctb_idx, num_pu_per_ctb, ctb_start_pu_idx, pu_cnt; |
700 | 1.86M | pu_t *ps_pu; |
701 | 1.86M | WORD32 num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE); |
702 | 1.86M | UWORD8 *pu1_pic_pu_map_ctb = ps_proc->pu1_pic_pu_map + |
703 | 1.86M | (ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb) * num_minpu_in_ctb; |
704 | 1.86M | WORD32 row, col; |
705 | 1.86M | UWORD32 *pu4_nbr_pu_idx = ps_proc->pu4_pic_pu_idx_map; |
706 | 1.86M | WORD32 nbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2; |
707 | 1.86M | WORD32 ctb_size_in_min_pu = (ctb_size / MIN_PU_SIZE); |
708 | | |
709 | | /* Neighbor PU idx update inside CTB */ |
710 | | /* 1byte per 4x4. Indicates the PU idx that 4x4 block belongs to */ |
711 | | |
712 | 1.86M | cur_ctb_idx = ps_proc->i4_ctb_x |
713 | 1.86M | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
714 | 1.86M | next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt; |
715 | 1.86M | num_pu_per_ctb = ps_proc->pu4_pic_pu_idx[next_ctb_idx] |
716 | 1.86M | - ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
717 | 1.86M | ctb_start_pu_idx = ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
718 | 1.86M | ps_pu = &ps_proc->ps_pic_pu[ctb_start_pu_idx]; |
719 | | |
720 | 95.0M | for(pu_cnt = 0; pu_cnt < num_pu_per_ctb; pu_cnt++, ps_pu++) |
721 | 93.1M | { |
722 | 93.1M | UWORD32 cur_pu_idx; |
723 | 93.1M | WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2; |
724 | 93.1M | WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2; |
725 | | |
726 | 93.1M | cur_pu_idx = ctb_start_pu_idx + pu_cnt; |
727 | | |
728 | 283M | for(row = 0; row < pu_ht / MIN_PU_SIZE; row++) |
729 | 616M | for(col = 0; col < pu_wd / MIN_PU_SIZE; col++) |
730 | 426M | pu4_nbr_pu_idx[(1 + ps_pu->b4_pos_x + col) |
731 | 426M | + (1 + ps_pu->b4_pos_y + row) |
732 | 426M | * nbr_pu_idx_strd] = |
733 | 426M | cur_pu_idx; |
734 | 93.1M | } |
735 | | |
736 | | /* Updating Top and Left pointers */ |
737 | 1.86M | { |
738 | 1.86M | WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples |
739 | 1.86M | - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size); |
740 | 1.86M | WORD32 ctb_size_left = MIN(ctb_size, rows_remaining); |
741 | | |
742 | | /* Top Left */ |
743 | | /* saving top left before updating top ptr, as updating top ptr will overwrite the top left for the next ctb */ |
744 | 1.86M | ps_proc->u4_ctb_top_left_pu_idx = ps_proc->pu4_pic_pu_idx_top[((ps_proc->i4_ctb_x + 1) * ctb_size / MIN_PU_SIZE) - 1]; |
745 | 31.3M | for(i = 0; i < ctb_size / MIN_PU_SIZE; i++) |
746 | 29.5M | { |
747 | | /* Left */ |
748 | | /* Last column of au4_nbr_pu_idx */ |
749 | 29.5M | ps_proc->pu4_pic_pu_idx_left[i] = |
750 | 29.5M | pu4_nbr_pu_idx[(ctb_size / MIN_PU_SIZE) + (i + 1) * nbr_pu_idx_strd]; |
751 | | /* Top */ |
752 | | /* Last row of au4_nbr_pu_idx */ |
753 | 29.5M | ps_proc->pu4_pic_pu_idx_top[(ps_proc->i4_ctb_x * ctb_size / MIN_PU_SIZE) + i] = |
754 | 29.5M | pu4_nbr_pu_idx[(ctb_size_left / MIN_PU_SIZE) * nbr_pu_idx_strd + i + 1]; |
755 | | |
756 | 29.5M | } |
757 | | |
758 | | /* Updating the CTB level PU idx (Used for collocated MV pred)*/ |
759 | 1.86M | { |
760 | 1.86M | WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map; |
761 | 1.86M | WORD32 first_pu_of_ctb; |
762 | 1.86M | first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd]; |
763 | 1.86M | UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples |
764 | 1.86M | - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu); |
765 | 1.86M | UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples |
766 | 1.86M | - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu); |
767 | | |
768 | 1.86M | index_pic_map = 0 * ctb_size_in_min_pu + 0; |
769 | 1.86M | index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1); |
770 | | |
771 | 29.2M | for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++) |
772 | 27.4M | { |
773 | 454M | for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++) |
774 | 426M | { |
775 | 426M | pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col] |
776 | 426M | - first_pu_of_ctb; |
777 | 426M | } |
778 | 27.4M | index_pic_map += ctb_size_in_min_pu; |
779 | 27.4M | index_nbr_map += nbr_pu_idx_strd; |
780 | 27.4M | } |
781 | 1.86M | } |
782 | 1.86M | } |
783 | 1.86M | } |
784 | 4.62M | } |
785 | | |
786 | 4.62M | if(ps_proc->ps_pps->i1_tiles_enabled_flag) |
787 | 784k | { |
788 | | /*Update the tile index buffer with tile information for the current ctb*/ |
789 | 784k | UWORD16 *pu1_tile_idx = ps_proc->pu1_tile_idx; |
790 | 784k | pu1_tile_idx[(ps_proc->i4_ctb_x + (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb))] |
791 | 784k | = ps_proc->i4_cur_tile_idx; |
792 | 784k | } |
793 | | |
794 | | /*************************************************/ |
795 | | /*********** BS, QP and Deblocking **************/ |
796 | | /*************************************************/ |
797 | | /* Boundary strength call has to be after IQ IT recon since QP population needs ps_proc->i4_qp_const_inc_ctb flag */ |
798 | | |
799 | 4.62M | { |
800 | 4.62M | slice_header_t *ps_slice_hdr; |
801 | 4.62M | ps_slice_hdr = ps_proc->ps_slice_hdr; |
802 | | |
803 | | |
804 | | /* Check if deblock is disabled for the current slice or if it is disabled for the current picture |
805 | | * because of disable deblock api |
806 | | */ |
807 | 4.62M | if(0 == ps_codec->i4_disable_deblk_pic) |
808 | 4.61M | { |
809 | 4.61M | if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD) |
810 | 2.36M | { |
811 | | /* Boundary strength calculation is done irrespective of whether deblocking is disabled |
812 | | * in the slice or not, to handle deblocking slice boundaries */ |
813 | 2.36M | if((0 == ps_codec->i4_slice_error)) |
814 | 2.10M | { |
815 | 2.10M | ihevcd_update_ctb_tu_cnt(ps_proc); |
816 | 2.10M | ps_proc->s_bs_ctxt.ps_pps = ps_proc->ps_pps; |
817 | 2.10M | ps_proc->s_bs_ctxt.ps_sps = ps_proc->ps_sps; |
818 | 2.10M | ps_proc->s_bs_ctxt.ps_codec = ps_proc->ps_codec; |
819 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_tu_cnt = ps_proc->i4_ctb_tu_cnt; |
820 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
821 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
822 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_tile_x = ps_proc->i4_ctb_tile_x; |
823 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_tile_y = ps_proc->i4_ctb_tile_y; |
824 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
825 | 2.10M | ps_proc->s_bs_ctxt.i4_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
826 | 2.10M | ps_proc->s_bs_ctxt.ps_tu = ps_proc->ps_tu; |
827 | 2.10M | ps_proc->s_bs_ctxt.ps_pu = ps_proc->ps_pu; |
828 | 2.10M | ps_proc->s_bs_ctxt.pu4_pic_pu_idx_map = ps_proc->pu4_pic_pu_idx_map; |
829 | 2.10M | ps_proc->s_bs_ctxt.i4_next_pu_ctb_cnt = ps_proc->i4_next_pu_ctb_cnt; |
830 | 2.10M | ps_proc->s_bs_ctxt.i4_next_tu_ctb_cnt = ps_proc->i4_next_tu_ctb_cnt; |
831 | 2.10M | ps_proc->s_bs_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx; |
832 | 2.10M | ps_proc->s_bs_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
833 | 2.10M | ps_proc->s_bs_ctxt.ps_tile = ps_proc->ps_tile; |
834 | | |
835 | 2.10M | if(ISLICE == ps_slice_hdr->i1_slice_type) |
836 | 1.60M | { |
837 | 1.60M | ihevcd_ctb_boundary_strength_islice(&ps_proc->s_bs_ctxt); |
838 | 1.60M | } |
839 | 499k | else |
840 | 499k | { |
841 | 499k | ihevcd_ctb_boundary_strength_pbslice(&ps_proc->s_bs_ctxt); |
842 | 499k | } |
843 | 2.10M | } |
844 | | |
845 | | /* Boundary strength is set to zero if deblocking is disabled for the current slice */ |
846 | 2.36M | if((0 != ps_slice_hdr->i1_slice_disable_deblocking_filter_flag)) |
847 | 305k | { |
848 | 305k | WORD32 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) * (ctb_size * ctb_size / 8 / 16); |
849 | | |
850 | 305k | UWORD32 *pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_proc->s_bs_ctxt.pu4_pic_vert_bs + |
851 | 305k | ps_proc->i4_ctb_x * (ctb_size * ctb_size / 8 / 16) + |
852 | 305k | ps_proc->i4_ctb_y * bs_strd); |
853 | 305k | UWORD32 *pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_proc->s_bs_ctxt.pu4_pic_horz_bs + |
854 | 305k | ps_proc->i4_ctb_x * (ctb_size * ctb_size / 8 / 16) + |
855 | 305k | ps_proc->i4_ctb_y * bs_strd); |
856 | | |
857 | 305k | memset(pu4_vert_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2); |
858 | 305k | memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2); |
859 | 305k | } |
860 | 2.36M | } |
861 | 4.61M | } |
862 | 4.62M | } |
863 | | |
864 | | /* Per CTB update the following */ |
865 | 4.62M | { |
866 | 4.62M | WORD32 cur_ctb_idx = ps_proc->i4_ctb_x |
867 | 4.62M | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
868 | 4.62M | cur_ctb_idx++; |
869 | | |
870 | 4.62M | ps_proc->pu1_pu_map += nctb * num_minpu_in_ctb; |
871 | 4.62M | ps_proc->ps_tu += ps_proc->i4_ctb_tu_cnt; |
872 | 4.62M | if((1 == ps_codec->i4_num_cores) && |
873 | 1.05M | (0 == cur_ctb_idx % RESET_TU_BUF_NCTB)) |
874 | 0 | { |
875 | 0 | ps_proc->ps_tu = ps_proc->ps_pic_tu; |
876 | 0 | } |
877 | 4.62M | ps_proc->ps_pu += ps_proc->i4_ctb_pu_cnt; |
878 | 4.62M | } |
879 | | |
880 | | /* Update proc map for recon*/ |
881 | 4.62M | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
882 | | |
883 | 4.62M | num_ctb_tmp -= nctb; |
884 | 4.62M | ihevcd_ctb_pos_update(ps_proc, nctb); |
885 | | |
886 | 4.62M | } |
887 | | |
888 | 546k | if(cur_slice_idx != ps_proc->i4_cur_slice_idx) |
889 | 6.88k | { |
890 | 6.88k | ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1)); |
891 | 6.88k | ps_proc->i4_cur_slice_idx = cur_slice_idx; |
892 | 6.88k | } |
893 | | /* Restore the saved variables */ |
894 | 546k | num_ctb_tmp = num_ctb; |
895 | 546k | ps_proc->i4_ctb_x -= num_ctb; |
896 | 546k | ps_proc->i4_ctb_tile_x = cur_ctb_tile_x; |
897 | 546k | ps_proc->i4_ctb_slice_x = cur_ctb_slice_x; |
898 | 546k | ps_proc->i4_ctb_tile_y = cur_ctb_tile_y; |
899 | 546k | ps_proc->i4_ctb_slice_y = cur_ctb_slice_y; |
900 | 546k | ps_proc->pu1_pu_map = pu1_pu_map_cur; |
901 | 546k | ps_proc->ps_tu = ps_tu_cur; |
902 | 546k | proc_type = PROC_RECON; |
903 | | |
904 | 5.16M | while(num_ctb_tmp) |
905 | 4.62M | { |
906 | | |
907 | | /* Check proc map to ensure dependencies for recon are met */ |
908 | 4.62M | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
909 | | |
910 | 4.62M | ihevcd_slice_hdr_update(ps_proc); |
911 | | |
912 | 4.62M | { |
913 | | |
914 | 4.62M | ihevcd_ctb_avail_update(ps_proc); |
915 | | |
916 | | /*************************************************/ |
917 | | /**************** IQ IT RECON *******************/ |
918 | | /*************************************************/ |
919 | | |
920 | 4.62M | ihevcd_update_ctb_tu_cnt(ps_proc); |
921 | | |
922 | | /* When scaling matrix is not to be used(scaling_list_enable_flag is zero in SPS), |
923 | | * default value of 16 has to be used. Since the value is same for all sizes, |
924 | | * same table is used for all cases. |
925 | | */ |
926 | 4.62M | if(0 == ps_sps->i1_scaling_list_enable_flag) |
927 | 799k | { |
928 | 799k | ps_proc->api2_dequant_intra_matrix[0] = |
929 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
930 | 799k | ps_proc->api2_dequant_intra_matrix[1] = |
931 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
932 | 799k | ps_proc->api2_dequant_intra_matrix[2] = |
933 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
934 | 799k | ps_proc->api2_dequant_intra_matrix[3] = |
935 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
936 | | |
937 | 799k | ps_proc->api2_dequant_inter_matrix[0] = |
938 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
939 | 799k | ps_proc->api2_dequant_inter_matrix[1] = |
940 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
941 | 799k | ps_proc->api2_dequant_inter_matrix[2] = |
942 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
943 | 799k | ps_proc->api2_dequant_inter_matrix[3] = |
944 | 799k | (WORD16 *)gi2_flat_scale_mat_32x32; |
945 | 799k | } |
946 | 3.82M | else |
947 | 3.82M | { |
948 | 3.82M | if(0 == ps_sps->i1_sps_scaling_list_data_present_flag) |
949 | 3.69M | { |
950 | 3.69M | ps_proc->api2_dequant_intra_matrix[0] = |
951 | 3.69M | (WORD16 *)gi2_flat_scale_mat_32x32; |
952 | 3.69M | ps_proc->api2_dequant_intra_matrix[1] = |
953 | 3.69M | (WORD16 *)gi2_intra_default_scale_mat_8x8; |
954 | 3.69M | ps_proc->api2_dequant_intra_matrix[2] = |
955 | 3.69M | (WORD16 *)gi2_intra_default_scale_mat_16x16; |
956 | 3.69M | ps_proc->api2_dequant_intra_matrix[3] = |
957 | 3.69M | (WORD16 *)gi2_intra_default_scale_mat_32x32; |
958 | | |
959 | 3.69M | ps_proc->api2_dequant_inter_matrix[0] = |
960 | 3.69M | (WORD16 *)gi2_flat_scale_mat_32x32; |
961 | 3.69M | ps_proc->api2_dequant_inter_matrix[1] = |
962 | 3.69M | (WORD16 *)gi2_inter_default_scale_mat_8x8; |
963 | 3.69M | ps_proc->api2_dequant_inter_matrix[2] = |
964 | 3.69M | (WORD16 *)gi2_inter_default_scale_mat_16x16; |
965 | 3.69M | ps_proc->api2_dequant_inter_matrix[3] = |
966 | 3.69M | (WORD16 *)gi2_inter_default_scale_mat_32x32; |
967 | 3.69M | } |
968 | | /*TODO: Add support for custom scaling matrices */ |
969 | 3.82M | } |
970 | | |
971 | | |
972 | | /* CTB Level pointers */ |
973 | 4.62M | ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma |
974 | 4.62M | + (ps_proc->i4_ctb_x * ctb_size |
975 | 4.62M | + ps_proc->i4_ctb_y * ctb_size |
976 | 4.62M | * ps_codec->i4_strd); |
977 | 4.62M | ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma |
978 | 4.62M | + ps_proc->i4_ctb_x * ctb_size |
979 | 4.62M | + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); |
980 | | |
981 | 4.62M | ihevcd_iquant_itrans_recon_ctb(ps_proc); |
982 | 4.62M | } |
983 | | |
984 | | /* Per CTB update the following */ |
985 | 4.62M | { |
986 | 4.62M | WORD32 cur_ctb_idx = ps_proc->i4_ctb_x |
987 | 4.62M | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
988 | 4.62M | cur_ctb_idx++; |
989 | | |
990 | 4.62M | ps_proc->pu1_pu_map += nctb * num_minpu_in_ctb; |
991 | 4.62M | ps_proc->ps_tu += ps_proc->i4_ctb_tu_cnt; |
992 | 4.62M | if((1 == ps_codec->i4_num_cores) && |
993 | 1.05M | (0 == cur_ctb_idx % RESET_TU_BUF_NCTB)) |
994 | 0 | { |
995 | 0 | ps_proc->ps_tu = ps_proc->ps_pic_tu; |
996 | 0 | } |
997 | 4.62M | ps_proc->ps_pu += ps_proc->i4_ctb_pu_cnt; |
998 | 4.62M | } |
999 | | |
1000 | | |
1001 | | /* Update proc map for recon*/ |
1002 | 4.62M | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
1003 | | |
1004 | 4.62M | num_ctb_tmp -= nctb; |
1005 | 4.62M | ihevcd_ctb_pos_update(ps_proc, nctb); |
1006 | 4.62M | } |
1007 | | |
1008 | 546k | if(cur_slice_idx != ps_proc->i4_cur_slice_idx) |
1009 | 6.88k | { |
1010 | 6.88k | ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1)); |
1011 | 6.88k | ps_proc->i4_cur_slice_idx = cur_slice_idx; |
1012 | 6.88k | } |
1013 | | /* Restore the saved variables */ |
1014 | 546k | num_ctb_tmp = num_ctb; |
1015 | 546k | ps_proc->i4_ctb_x -= num_ctb; |
1016 | 546k | ps_proc->i4_ctb_tile_x = cur_ctb_tile_x; |
1017 | 546k | ps_proc->i4_ctb_slice_x = cur_ctb_slice_x; |
1018 | 546k | ps_proc->i4_ctb_tile_y = cur_ctb_tile_y; |
1019 | 546k | ps_proc->i4_ctb_slice_y = cur_ctb_slice_y; |
1020 | 546k | pu1_pu_map_nxt = ps_proc->pu1_pu_map; |
1021 | 546k | ps_tu_nxt = ps_proc->ps_tu; |
1022 | 546k | ps_proc->pu1_pu_map = pu1_pu_map_cur; |
1023 | 546k | ps_proc->ps_tu = ps_tu_cur; |
1024 | 546k | proc_type = PROC_DEBLK; |
1025 | | |
1026 | 5.16M | while(num_ctb_tmp) |
1027 | 4.61M | { |
1028 | | |
1029 | | |
1030 | | /* Check proc map to ensure dependencies for deblk are met */ |
1031 | 4.61M | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
1032 | | |
1033 | 4.61M | ihevcd_slice_hdr_update(ps_proc); |
1034 | | |
1035 | | |
1036 | 4.61M | if(((0 == FRAME_ILF_PAD || ps_codec->i4_num_cores != 1)) && |
1037 | 4.61M | (0 == ps_codec->i4_disable_deblk_pic)) |
1038 | 4.61M | { |
1039 | 4.61M | WORD32 i4_is_last_ctb_x = 0; |
1040 | 4.61M | WORD32 i4_is_last_ctb_y = 0; |
1041 | | |
1042 | | |
1043 | | /* Deblocking is done irrespective of whether it is disabled in the slice or not, |
1044 | | * to handle deblocking the slice boundaries */ |
1045 | 4.61M | { |
1046 | 4.61M | ps_proc->s_deblk_ctxt.ps_pps = ps_proc->ps_pps; |
1047 | 4.61M | ps_proc->s_deblk_ctxt.ps_sps = ps_proc->ps_sps; |
1048 | 4.61M | ps_proc->s_deblk_ctxt.ps_codec = ps_proc->ps_codec; |
1049 | 4.61M | ps_proc->s_deblk_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
1050 | 4.61M | ps_proc->s_deblk_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
1051 | 4.61M | ps_proc->s_deblk_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
1052 | 4.61M | ps_proc->s_deblk_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx; |
1053 | 4.61M | ps_proc->s_deblk_ctxt.is_chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); |
1054 | | |
1055 | | /* Populating Current CTB's no_loop_filter flags */ |
1056 | 4.61M | { |
1057 | 4.61M | WORD32 row; |
1058 | 4.61M | WORD32 log2_ctb_size = ps_sps->i1_log2_ctb_size; |
1059 | | |
1060 | | /* Loop filter strd in units of num bits */ |
1061 | 4.61M | WORD32 loop_filter_strd = ((ps_sps->i2_pic_width_in_luma_samples + 63) >> 6) << 3; |
1062 | | /* Bit position is the current 8x8 bit offset wrt pic_no_loop_filter |
1063 | | * bit_pos has to be a WOR32 so that when it is negative, the downshift still retains it to be a negative value */ |
1064 | 4.61M | WORD32 bit_pos = ((ps_proc->i4_ctb_y << (log2_ctb_size - 3)) - 1) * loop_filter_strd + (ps_proc->i4_ctb_x << (log2_ctb_size - 3)) - 1; |
1065 | | |
1066 | 40.8M | for(row = 0; row < (ctb_size >> 3) + 1; row++) |
1067 | 36.2M | { |
1068 | | /* Go to the corresponding byte - read 32 bits and downshift */ |
1069 | 36.2M | ps_proc->s_deblk_ctxt.au2_ctb_no_loop_filter_flag[row] = (*(UWORD32 *)(ps_proc->pu1_pic_no_loop_filter_flag + (bit_pos >> 3))) >> (bit_pos & 7); |
1070 | 36.2M | bit_pos += loop_filter_strd; |
1071 | 36.2M | } |
1072 | 4.61M | } |
1073 | | |
1074 | 4.61M | ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y); |
1075 | | |
1076 | | /* If the last CTB in the row was a complete CTB then deblocking has to be called from remaining pixels, since deblocking |
1077 | | * is applied on a shifted CTB structure |
1078 | | */ |
1079 | 4.61M | if(ps_proc->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1) |
1080 | 165k | { |
1081 | 165k | WORD32 i4_is_last_ctb_x = 1; |
1082 | 165k | WORD32 i4_is_last_ctb_y = 0; |
1083 | | |
1084 | 165k | WORD32 last_x_pos; |
1085 | 165k | last_x_pos = (ps_sps->i2_pic_wd_in_ctb << ps_sps->i1_log2_ctb_size); |
1086 | 165k | if(last_x_pos == ps_sps->i2_pic_width_in_luma_samples) |
1087 | 29.8k | { |
1088 | 29.8k | ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y); |
1089 | 29.8k | } |
1090 | 165k | } |
1091 | | |
1092 | | |
1093 | | /* If the last CTB in the column was a complete CTB then deblocking has to be called from remaining pixels, since deblocking |
1094 | | * is applied on a shifted CTB structure |
1095 | | */ |
1096 | 4.61M | if(ps_proc->i4_ctb_y == ps_sps->i2_pic_ht_in_ctb - 1) |
1097 | 433k | { |
1098 | 433k | WORD32 i4_is_last_ctb_x = 0; |
1099 | 433k | WORD32 i4_is_last_ctb_y = 1; |
1100 | 433k | WORD32 last_y_pos; |
1101 | 433k | last_y_pos = (ps_sps->i2_pic_ht_in_ctb << ps_sps->i1_log2_ctb_size); |
1102 | 433k | if(last_y_pos == ps_sps->i2_pic_height_in_luma_samples) |
1103 | 7.69k | { |
1104 | 7.69k | ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y); |
1105 | 7.69k | } |
1106 | 433k | } |
1107 | 4.61M | } |
1108 | 4.61M | } |
1109 | | |
1110 | | /* Update proc map for deblk*/ |
1111 | 4.61M | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
1112 | | |
1113 | 4.61M | num_ctb_tmp -= nctb; |
1114 | 4.61M | ihevcd_ctb_pos_update(ps_proc, nctb); |
1115 | 4.61M | } |
1116 | | |
1117 | 546k | if(cur_slice_idx != ps_proc->i4_cur_slice_idx) |
1118 | 6.88k | { |
1119 | 6.88k | ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1)); |
1120 | 6.88k | ps_proc->i4_cur_slice_idx = cur_slice_idx; |
1121 | 6.88k | } |
1122 | | /* Restore the saved variables */ |
1123 | 546k | num_ctb_tmp = num_ctb; |
1124 | 546k | ps_proc->i4_ctb_x -= num_ctb; |
1125 | 546k | ps_proc->i4_ctb_tile_x = cur_ctb_tile_x; |
1126 | 546k | ps_proc->i4_ctb_tile_y = cur_ctb_tile_y; |
1127 | 546k | ps_proc->pu1_pu_map = pu1_pu_map_cur; |
1128 | 546k | ps_proc->ps_tu = ps_tu_cur; |
1129 | 546k | nxt_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
1130 | 546k | nxt_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
1131 | 546k | ps_proc->i4_ctb_slice_y = cur_ctb_slice_y; |
1132 | 546k | ps_proc->i4_ctb_slice_x = cur_ctb_slice_x; |
1133 | 546k | proc_type = PROC_SAO; |
1134 | | |
1135 | 5.16M | while(num_ctb_tmp) |
1136 | 4.62M | { |
1137 | | |
1138 | | |
1139 | | /* Check proc map to ensure dependencies for SAO are met */ |
1140 | 4.62M | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
1141 | | |
1142 | 4.62M | ihevcd_slice_hdr_update(ps_proc); |
1143 | | |
1144 | | |
1145 | 4.62M | if(0 == FRAME_ILF_PAD || ps_codec->i4_num_cores != 1) |
1146 | 4.62M | { |
1147 | | /* SAO is done even when it is disabled in the current slice, because |
1148 | | * it is performed on a shifted CTB and the neighbor CTBs can belong |
1149 | | * to different slices with SAO enabled */ |
1150 | 4.62M | if(0 == ps_codec->i4_disable_sao_pic) |
1151 | 4.62M | { |
1152 | 4.62M | ps_proc->s_sao_ctxt.ps_pps = ps_proc->ps_pps; |
1153 | 4.62M | ps_proc->s_sao_ctxt.ps_sps = ps_proc->ps_sps; |
1154 | 4.62M | ps_proc->s_sao_ctxt.ps_tile = ps_proc->ps_tile; |
1155 | 4.62M | ps_proc->s_sao_ctxt.ps_codec = ps_proc->ps_codec; |
1156 | 4.62M | ps_proc->s_sao_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
1157 | 4.62M | ps_proc->s_sao_ctxt.i4_cur_slice_idx = ps_proc->i4_cur_slice_idx; |
1158 | | |
1159 | | |
1160 | 4.62M | #if SAO_PROCESS_SHIFT_CTB |
1161 | 4.62M | ps_proc->s_sao_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
1162 | 4.62M | ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
1163 | 4.62M | ps_proc->s_sao_ctxt.is_chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); |
1164 | | |
1165 | 4.62M | ihevcd_sao_shift_ctb(&ps_proc->s_sao_ctxt); |
1166 | | #else |
1167 | | if(ps_proc->i4_ctb_x > 1 && ps_proc->i4_ctb_y > 0) |
1168 | | { |
1169 | | ps_proc->s_sao_ctxt.i4_ctb_x = ps_proc->i4_ctb_x - 2; |
1170 | | ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y - 1; |
1171 | | |
1172 | | ihevcd_sao_ctb(&ps_proc->s_sao_ctxt); |
1173 | | } |
1174 | | |
1175 | | if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_proc->i4_ctb_x && ps_proc->i4_ctb_y > 0) |
1176 | | { |
1177 | | ps_proc->s_sao_ctxt.i4_ctb_x = ps_proc->i4_ctb_x - 1; |
1178 | | ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y - 1; |
1179 | | |
1180 | | ihevcd_sao_ctb(&ps_proc->s_sao_ctxt); |
1181 | | |
1182 | | ps_proc->s_sao_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
1183 | | ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y - 1; |
1184 | | |
1185 | | ihevcd_sao_ctb(&ps_proc->s_sao_ctxt); |
1186 | | |
1187 | | if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_proc->i4_ctb_y) |
1188 | | { |
1189 | | WORD32 i4_ctb_x; |
1190 | | ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
1191 | | for(i4_ctb_x = 0; i4_ctb_x < ps_sps->i2_pic_wd_in_ctb; i4_ctb_x++) |
1192 | | { |
1193 | | ps_proc->s_sao_ctxt.i4_ctb_x = i4_ctb_x; |
1194 | | ihevcd_sao_ctb(&ps_proc->s_sao_ctxt); |
1195 | | } |
1196 | | } |
1197 | | } |
1198 | | #endif |
1199 | 4.62M | } |
1200 | | |
1201 | | |
1202 | | /* Call padding if required */ |
1203 | 4.62M | { |
1204 | 4.62M | #if SAO_PROCESS_SHIFT_CTB |
1205 | | |
1206 | 4.62M | if(0 == ps_proc->i4_ctb_x) |
1207 | 165k | { |
1208 | 165k | WORD32 pad_ht_luma; |
1209 | 165k | WORD32 pad_ht_chroma; |
1210 | | |
1211 | 165k | ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma |
1212 | 165k | + (ps_proc->i4_ctb_x * ctb_size |
1213 | 165k | + ps_proc->i4_ctb_y * ctb_size |
1214 | 165k | * ps_codec->i4_strd); |
1215 | 165k | ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma |
1216 | 165k | + ps_proc->i4_ctb_x * ctb_size |
1217 | 165k | + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); |
1218 | | |
1219 | 165k | pad_ht_luma = ctb_size; |
1220 | 165k | pad_ht_luma += (ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y ? 8 : 0; |
1221 | 165k | pad_ht_chroma = ctb_size / 2; |
1222 | | /* Pad left after 1st CTB is processed */ |
1223 | 165k | ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_ctb_luma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); |
1224 | 165k | ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_ctb_chroma - 16 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); |
1225 | 165k | } |
1226 | | |
1227 | 4.62M | if((ps_sps->i2_pic_wd_in_ctb - 1) == ps_proc->i4_ctb_x) |
1228 | 165k | { |
1229 | 165k | WORD32 pad_ht_luma; |
1230 | 165k | WORD32 pad_ht_chroma; |
1231 | 165k | WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size); |
1232 | | |
1233 | 165k | ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma |
1234 | 165k | + (ps_proc->i4_ctb_x * ctb_size |
1235 | 165k | + ps_proc->i4_ctb_y * ctb_size |
1236 | 165k | * ps_codec->i4_strd); |
1237 | 165k | ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma |
1238 | 165k | + ps_proc->i4_ctb_x * ctb_size |
1239 | 165k | + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); |
1240 | | |
1241 | 165k | pad_ht_luma = ctb_size; |
1242 | 165k | pad_ht_chroma = ctb_size / 2; |
1243 | 165k | if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) |
1244 | 13.4k | { |
1245 | 13.4k | pad_ht_luma += 8; |
1246 | 13.4k | pad_ht_chroma += 16; |
1247 | 13.4k | ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_pic_chroma + (ps_sps->i2_pic_height_in_luma_samples / 2 - 16) * ps_codec->i4_strd, |
1248 | 13.4k | ps_codec->i4_strd, 16, PAD_LEFT); |
1249 | 13.4k | } |
1250 | | /* Pad right after last CTB in the current row is processed */ |
1251 | 165k | ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(ps_proc->pu1_cur_ctb_luma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT); |
1252 | 165k | ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(ps_proc->pu1_cur_ctb_chroma + cols_remaining - 16 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); |
1253 | | |
1254 | 165k | if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) |
1255 | 13.4k | { |
1256 | 13.4k | UWORD8 *pu1_buf; |
1257 | | /* Since SAO is shifted by 8x8, chroma padding can not be done till second row is processed */ |
1258 | | /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */ |
1259 | | /* Pad top after padding left and right for current rows after processing 1st CTB row */ |
1260 | 13.4k | ihevc_pad_top(ps_proc->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP); |
1261 | 13.4k | ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); |
1262 | | |
1263 | 13.4k | pu1_buf = ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT; |
1264 | | /* Pad top after padding left and right for current rows after processing 1st CTB row */ |
1265 | 13.4k | ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT); |
1266 | | |
1267 | 13.4k | pu1_buf = ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; |
1268 | 13.4k | ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); |
1269 | 13.4k | } |
1270 | 165k | } |
1271 | | #else |
1272 | | if(ps_proc->i4_ctb_y > 1) |
1273 | | { |
1274 | | if(0 == ps_proc->i4_ctb_x) |
1275 | | { |
1276 | | WORD32 pad_ht_luma; |
1277 | | WORD32 pad_ht_chroma; |
1278 | | |
1279 | | pad_ht_luma = ctb_size; |
1280 | | pad_ht_chroma = ctb_size / 2; |
1281 | | /* Pad left after 1st CTB is processed */ |
1282 | | ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_ctb_luma - 2 * ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); |
1283 | | ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_ctb_chroma - ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); |
1284 | | } |
1285 | | else if((ps_sps->i2_pic_wd_in_ctb - 1) == ps_proc->i4_ctb_x) |
1286 | | { |
1287 | | WORD32 pad_ht_luma; |
1288 | | WORD32 pad_ht_chroma; |
1289 | | WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size); |
1290 | | |
1291 | | pad_ht_luma = ((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) ? 3 * ctb_size : ctb_size; |
1292 | | pad_ht_chroma = ((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) ? 3 * ctb_size / 2 : ctb_size / 2; |
1293 | | /* Pad right after last CTB in the current row is processed */ |
1294 | | ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(ps_proc->pu1_cur_ctb_luma + cols_remaining - 2 * ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT); |
1295 | | ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(ps_proc->pu1_cur_ctb_chroma + cols_remaining - ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); |
1296 | | |
1297 | | if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) |
1298 | | { |
1299 | | UWORD8 *pu1_buf; |
1300 | | WORD32 pad_ht_luma; |
1301 | | WORD32 pad_ht_chroma; |
1302 | | |
1303 | | pad_ht_luma = 2 * ctb_size; |
1304 | | pad_ht_chroma = ctb_size; |
1305 | | |
1306 | | ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples - 2 * ctb_size), |
1307 | | ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); |
1308 | | ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2 - ctb_size), |
1309 | | ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); |
1310 | | |
1311 | | /* Since SAO is shifted by 8x8, chroma padding can not be done till second row is processed */ |
1312 | | /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */ |
1313 | | /* Pad top after padding left and right for current rows after processing 1st CTB row */ |
1314 | | ihevc_pad_top(ps_proc->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP); |
1315 | | ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); |
1316 | | |
1317 | | pu1_buf = ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT; |
1318 | | /* Pad top after padding left and right for current rows after processing 1st CTB row */ |
1319 | | ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT); |
1320 | | |
1321 | | pu1_buf = ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; |
1322 | | ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); |
1323 | | } |
1324 | | } |
1325 | | } |
1326 | | #endif |
1327 | 4.62M | } |
1328 | 4.62M | } |
1329 | | |
1330 | | |
1331 | | /* Update proc map for SAO*/ |
1332 | 4.62M | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
1333 | | /* Update proc map for Completion of CTB*/ |
1334 | 4.62M | ihevcd_proc_map_update(ps_proc, PROC_ALL, nctb); |
1335 | 4.62M | { |
1336 | 4.62M | tile_t *ps_tile; |
1337 | | |
1338 | 4.62M | ps_tile = ps_proc->ps_tile; |
1339 | 4.62M | num_ctb_tmp -= nctb; |
1340 | | |
1341 | 4.62M | ps_proc->i4_ctb_tile_x += nctb; |
1342 | 4.62M | ps_proc->i4_ctb_x += nctb; |
1343 | | |
1344 | 4.62M | ps_proc->i4_ctb_slice_x += nctb; |
1345 | | |
1346 | | |
1347 | | /* Update tile counters */ |
1348 | 4.62M | if(ps_proc->i4_ctb_tile_x >= (ps_tile->u2_wd)) |
1349 | 205k | { |
1350 | | /*End of tile row*/ |
1351 | 205k | ps_proc->i4_ctb_tile_x = 0; |
1352 | 205k | ps_proc->i4_ctb_x = ps_tile->u1_pos_x; |
1353 | | |
1354 | 205k | ps_proc->i4_ctb_tile_y++; |
1355 | 205k | ps_proc->i4_ctb_y++; |
1356 | 205k | if(ps_proc->i4_ctb_tile_y == ps_tile->u2_ht) |
1357 | 27.2k | { |
1358 | | /* Reached End of Tile */ |
1359 | 27.2k | ps_proc->i4_ctb_tile_y = 0; |
1360 | 27.2k | ps_proc->i4_ctb_tile_x = 0; |
1361 | 27.2k | ps_proc->ps_tile++; |
1362 | | //End of picture |
1363 | 27.2k | if(!((ps_tile->u2_ht + ps_tile->u1_pos_y == ps_sps->i2_pic_ht_in_ctb) && (ps_tile->u2_wd + ps_tile->u1_pos_x == ps_sps->i2_pic_wd_in_ctb))) |
1364 | 13.8k | { |
1365 | 13.8k | ps_tile = ps_proc->ps_tile; |
1366 | 13.8k | ps_proc->i4_ctb_x = ps_tile->u1_pos_x; |
1367 | 13.8k | ps_proc->i4_ctb_y = ps_tile->u1_pos_y; |
1368 | | |
1369 | 13.8k | } |
1370 | 27.2k | } |
1371 | 205k | } |
1372 | 4.62M | } |
1373 | 4.62M | } |
1374 | | |
1375 | 546k | ps_proc->i4_ctb_cnt -= num_ctb; |
1376 | 546k | } |
1377 | 205k | return ret; |
1378 | 205k | } |
1379 | | |
1380 | | void ihevcd_init_proc_ctxt(process_ctxt_t *ps_proc, WORD32 tu_coeff_data_ofst) |
1381 | 205k | { |
1382 | 205k | codec_t *ps_codec; |
1383 | 205k | slice_header_t *ps_slice_hdr; |
1384 | 205k | pps_t *ps_pps; |
1385 | 205k | sps_t *ps_sps; |
1386 | 205k | tile_t *ps_tile, *ps_tile_prev; |
1387 | 205k | WORD32 tile_idx; |
1388 | 205k | WORD32 ctb_size; |
1389 | 205k | WORD32 num_minpu_in_ctb; |
1390 | 205k | WORD32 num_ctb_in_row; |
1391 | 205k | WORD32 ctb_addr; |
1392 | 205k | WORD32 i4_wd_in_ctb; |
1393 | 205k | WORD32 tile_start_ctb_idx; |
1394 | 205k | WORD32 slice_start_ctb_idx; |
1395 | 205k | WORD32 check_tile_wd; |
1396 | 205k | WORD32 continuous_tiles = 0; //Refers to tiles that are continuous, within a slice, horizontally |
1397 | | |
1398 | 205k | ps_codec = ps_proc->ps_codec; |
1399 | | |
1400 | 205k | ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((ps_proc->i4_cur_slice_idx) & (MAX_SLICE_HDR_CNT - 1)); |
1401 | 205k | ps_proc->ps_slice_hdr = ps_slice_hdr; |
1402 | 205k | ps_proc->ps_pps = ps_codec->ps_pps_base + ps_slice_hdr->i1_pps_id; |
1403 | 205k | ps_pps = ps_proc->ps_pps; |
1404 | 205k | ps_proc->ps_sps = ps_codec->ps_sps_base + ps_pps->i1_sps_id; |
1405 | 205k | ps_sps = ps_proc->ps_sps; |
1406 | 205k | ps_proc->i4_init_done = 1; |
1407 | 205k | ctb_size = 1 << ps_sps->i1_log2_ctb_size; |
1408 | 205k | num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE); |
1409 | 205k | num_ctb_in_row = ps_sps->i2_pic_wd_in_ctb; |
1410 | | |
1411 | 205k | ps_proc->s_sao_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx; |
1412 | | |
1413 | 205k | ihevcd_get_tile_pos(ps_pps, ps_sps, ps_proc->i4_ctb_x, ps_proc->i4_ctb_y, |
1414 | 205k | &ps_proc->i4_ctb_tile_x, &ps_proc->i4_ctb_tile_y, |
1415 | 205k | &tile_idx); |
1416 | | |
1417 | 205k | ps_proc->ps_tile = ps_pps->ps_tile + tile_idx; |
1418 | 205k | ps_proc->i4_cur_tile_idx = tile_idx; |
1419 | 205k | ps_tile = ps_proc->ps_tile; |
1420 | | |
1421 | 205k | if(ps_pps->i1_tiles_enabled_flag) |
1422 | 63.4k | { |
1423 | 63.4k | if(tile_idx) |
1424 | 56.7k | ps_tile_prev = ps_tile - 1; |
1425 | 6.71k | else |
1426 | 6.71k | ps_tile_prev = ps_tile; |
1427 | | |
1428 | 63.4k | slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
1429 | 63.4k | tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
1430 | | |
1431 | | /*Check if |
1432 | | * 1. Last tile that ends in frame boundary and 1st tile in next row belongs to same slice |
1433 | | * 1.1. If it does, check if the slice that has these tiles spans across the frame row. |
1434 | | * 2. Vertical tiles are present within a slice */ |
1435 | 63.4k | if(((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x) && (ps_slice_hdr->i2_ctb_y != ps_tile->u1_pos_y))) |
1436 | 13.4k | { |
1437 | 13.4k | continuous_tiles = 1; |
1438 | 13.4k | } |
1439 | 49.9k | else |
1440 | 49.9k | { |
1441 | 49.9k | check_tile_wd = ps_slice_hdr->i2_ctb_x + ps_tile_prev->u2_wd; |
1442 | 49.9k | if(!(((check_tile_wd >= ps_sps->i2_pic_wd_in_ctb) && (check_tile_wd % ps_sps->i2_pic_wd_in_ctb == ps_tile->u1_pos_x)) |
1443 | 46.8k | || ((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x)))) |
1444 | 35.0k | { |
1445 | 35.0k | continuous_tiles = 1; |
1446 | 35.0k | } |
1447 | 49.9k | } |
1448 | | |
1449 | 63.4k | { |
1450 | 63.4k | WORD32 i2_independent_ctb_x = ps_slice_hdr->i2_independent_ctb_x; |
1451 | 63.4k | WORD32 i2_independent_ctb_y = ps_slice_hdr->i2_independent_ctb_y; |
1452 | | |
1453 | | /* Handles cases where |
1454 | | * 1. Slices begin at the start of each tile |
1455 | | * 2. Tiles lie in the same slice row.i.e, starting tile_x > slice_x, but tile_y == slice_y |
1456 | | * */ |
1457 | 63.4k | if(ps_proc->i4_ctb_x >= i2_independent_ctb_x) |
1458 | 55.8k | { |
1459 | 55.8k | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - i2_independent_ctb_x; |
1460 | 55.8k | } |
1461 | 7.58k | else |
1462 | 7.58k | { |
1463 | | /* Indicates multiple tiles in a slice case where |
1464 | | * The new tile belongs to an older slice that started in the previous rows-not the present row |
1465 | | * & (tile_y > slice_y and tile_x < slice_x) |
1466 | | */ |
1467 | 7.58k | if((slice_start_ctb_idx < tile_start_ctb_idx) && (continuous_tiles)) |
1468 | 4.58k | { |
1469 | 4.58k | i4_wd_in_ctb = ps_sps->i2_pic_wd_in_ctb; |
1470 | 4.58k | } |
1471 | | /* Indicates many-tiles-in-one-slice case, for slices that end without spanning the frame width*/ |
1472 | 3.00k | else |
1473 | 3.00k | { |
1474 | 3.00k | i4_wd_in_ctb = ps_tile->u2_wd; |
1475 | 3.00k | } |
1476 | | |
1477 | 7.58k | if(continuous_tiles) |
1478 | 5.22k | { |
1479 | 5.22k | ps_proc->i4_ctb_slice_x = i4_wd_in_ctb |
1480 | 5.22k | - (i2_independent_ctb_x - ps_proc->i4_ctb_x); |
1481 | 5.22k | } |
1482 | 2.35k | else |
1483 | 2.35k | { |
1484 | 2.35k | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - ps_tile->u1_pos_x; |
1485 | 2.35k | } |
1486 | 7.58k | } |
1487 | | /* Initialize ctb slice y to zero and at the start of slice row initialize it |
1488 | | to difference between ctb_y and slice's start ctb y */ |
1489 | | |
1490 | 63.4k | ps_proc->i4_ctb_slice_y = ps_proc->i4_ctb_y - i2_independent_ctb_y; |
1491 | | |
1492 | | /*If beginning of tile, check if slice counters are set correctly*/ |
1493 | 63.4k | if((0 == ps_proc->i4_ctb_tile_x) && (0 == ps_proc->i4_ctb_tile_y)) |
1494 | 15.5k | { |
1495 | 15.5k | if(ps_slice_hdr->i1_dependent_slice_flag) |
1496 | 1.44k | { |
1497 | 1.44k | ps_proc->i4_ctb_slice_x = 0; |
1498 | 1.44k | ps_proc->i4_ctb_slice_y = 0; |
1499 | 1.44k | } |
1500 | | /*For slices that span across multiple tiles*/ |
1501 | 14.0k | else if(slice_start_ctb_idx < tile_start_ctb_idx) |
1502 | 11.7k | { |
1503 | 11.7k | ps_proc->i4_ctb_slice_y = ps_tile->u1_pos_y - i2_independent_ctb_y; |
1504 | | /* Two Cases |
1505 | | * 1 - slice spans across frame-width- but dose not start from 1st column |
1506 | | * 2 - Slice spans across multiple tiles anywhere is a frame |
1507 | | */ |
1508 | | /*TODO:In a multiple slice clip, if an independent slice span across more than 2 tiles in a row, it is not supported*/ |
1509 | 11.7k | if(continuous_tiles) //Case 2-implemented for slices that span not more than 2 tiles |
1510 | 11.2k | { |
1511 | 11.2k | if(i2_independent_ctb_y <= ps_tile->u1_pos_y) |
1512 | 9.89k | { |
1513 | | //Check if ctb x is before or after |
1514 | 9.89k | if(i2_independent_ctb_x > ps_tile->u1_pos_x) |
1515 | 285 | { |
1516 | 285 | ps_proc->i4_ctb_slice_y -= 1; |
1517 | 285 | } |
1518 | 9.89k | } |
1519 | 11.2k | } |
1520 | 11.7k | } |
1521 | 15.5k | } |
1522 | | //Slice starts from a column which is not the starting tile-column, but is within the tile |
1523 | 63.4k | if(((i2_independent_ctb_x - ps_tile->u1_pos_x) != 0) && ((ps_proc->i4_ctb_slice_y != 0)) |
1524 | 35.8k | && ((i2_independent_ctb_x >= ps_tile->u1_pos_x) && (i2_independent_ctb_x < ps_tile->u1_pos_x + ps_tile->u2_wd))) |
1525 | 1.51k | { |
1526 | 1.51k | ps_proc->i4_ctb_slice_y -= 1; |
1527 | 1.51k | } |
1528 | 63.4k | } |
1529 | 63.4k | } |
1530 | 142k | else |
1531 | 142k | { |
1532 | 142k | WORD32 i2_independent_ctb_x = ps_slice_hdr->i2_independent_ctb_x; |
1533 | 142k | WORD32 i2_independent_ctb_y = ps_slice_hdr->i2_independent_ctb_y; |
1534 | | |
1535 | | |
1536 | 142k | { |
1537 | 142k | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - i2_independent_ctb_x; |
1538 | 142k | ps_proc->i4_ctb_slice_y = ps_proc->i4_ctb_y - i2_independent_ctb_y; |
1539 | 142k | if(ps_proc->i4_ctb_slice_x < 0) |
1540 | 8.88k | { |
1541 | 8.88k | ps_proc->i4_ctb_slice_x += ps_sps->i2_pic_wd_in_ctb; |
1542 | 8.88k | ps_proc->i4_ctb_slice_y -= 1; |
1543 | 8.88k | } |
1544 | | |
1545 | | /* Initialize ctb slice y to zero and at the start of slice row initialize it |
1546 | | to difference between ctb_y and slice's start ctb y */ |
1547 | 142k | } |
1548 | 142k | } |
1549 | | |
1550 | | /* Compute TU offset for the current CTB set */ |
1551 | 205k | { |
1552 | | |
1553 | 205k | WORD32 ctb_luma_min_tu_cnt; |
1554 | 205k | WORD32 ctb_addr; |
1555 | | |
1556 | 205k | ctb_addr = ps_proc->i4_ctb_y * num_ctb_in_row + ps_proc->i4_ctb_x; |
1557 | | |
1558 | 205k | ctb_luma_min_tu_cnt = (1 << ps_sps->i1_log2_ctb_size) / MIN_TU_SIZE; |
1559 | 205k | ctb_luma_min_tu_cnt *= ctb_luma_min_tu_cnt; |
1560 | | |
1561 | 205k | ps_proc->pu1_tu_map = ps_proc->pu1_pic_tu_map |
1562 | 205k | + ctb_luma_min_tu_cnt * ctb_addr; |
1563 | 205k | if(1 == ps_codec->i4_num_cores) |
1564 | 92.4k | { |
1565 | 92.4k | ps_proc->ps_tu = ps_proc->ps_pic_tu + ps_proc->pu4_pic_tu_idx[ctb_addr % RESET_TU_BUF_NCTB]; |
1566 | 92.4k | } |
1567 | 113k | else |
1568 | 113k | { |
1569 | 113k | ps_proc->ps_tu = ps_proc->ps_pic_tu + ps_proc->pu4_pic_tu_idx[ctb_addr]; |
1570 | 113k | } |
1571 | 205k | ps_proc->pv_tu_coeff_data = (UWORD8 *)ps_proc->pv_pic_tu_coeff_data |
1572 | 205k | + tu_coeff_data_ofst; |
1573 | | |
1574 | 205k | } |
1575 | | |
1576 | | /* Compute PU related elements for the current CTB set */ |
1577 | 205k | { |
1578 | 205k | WORD32 pu_idx; |
1579 | 205k | ctb_addr = ps_proc->i4_ctb_y * num_ctb_in_row + ps_proc->i4_ctb_x; |
1580 | 205k | pu_idx = ps_proc->pu4_pic_pu_idx[ctb_addr]; |
1581 | 205k | ps_proc->pu1_pu_map = ps_proc->pu1_pic_pu_map |
1582 | 205k | + ctb_addr * num_minpu_in_ctb; |
1583 | 205k | ps_proc->ps_pu = ps_proc->ps_pic_pu + pu_idx; |
1584 | 205k | } |
1585 | | |
1586 | | /* Number of ctbs processed in one loop of process function */ |
1587 | 205k | { |
1588 | 205k | ps_proc->i4_nctb = MIN(ps_codec->u4_nctb, ps_tile->u2_wd); |
1589 | 205k | } |
1590 | | |
1591 | 205k | } |
1592 | | void ihevcd_process_thread(process_ctxt_t *ps_proc) |
1593 | 1.00k | { |
1594 | 1.00k | IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; |
1595 | 1.00k | { |
1596 | 1.00k | ithread_set_affinity(ps_proc->i4_id + 1); |
1597 | 1.00k | } |
1598 | | |
1599 | 19.8k | while(1) |
1600 | 19.8k | { |
1601 | 19.8k | codec_t *ps_dec = ps_proc->ps_codec; |
1602 | 19.8k | if(ps_proc->ps_codec->i4_threads_active) |
1603 | 19.8k | { |
1604 | 19.8k | DEBUG("In ihevcd_process_thread \n"); |
1605 | | |
1606 | 19.8k | ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); |
1607 | 19.8k | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1608 | 0 | break; |
1609 | | |
1610 | 38.6k | while(!ps_dec->ai4_process_start[ps_proc->i4_id]) |
1611 | 18.8k | { |
1612 | 18.8k | ithread_cond_wait(ps_dec->apv_proc_start_condition[ps_proc->i4_id], |
1613 | 18.8k | ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); |
1614 | 18.8k | } |
1615 | 19.8k | ps_dec->ai4_process_start[ps_proc->i4_id] = 0; |
1616 | 19.8k | ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); |
1617 | 19.8k | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1618 | 0 | break; |
1619 | | |
1620 | 19.8k | DEBUG(" Got control at ihevcd_process_thread \n"); |
1621 | | |
1622 | 19.8k | if(ps_dec->i4_break_threads == 1) |
1623 | 1.00k | break; |
1624 | 19.8k | } |
1625 | 168k | while(1) |
1626 | 168k | { |
1627 | 168k | proc_job_t s_job; |
1628 | | |
1629 | 168k | ret = ihevcd_jobq_dequeue((jobq_t *)ps_proc->pv_proc_jobq, &s_job, |
1630 | 168k | sizeof(proc_job_t), 1); |
1631 | 168k | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1632 | 18.8k | break; |
1633 | | |
1634 | 149k | ps_proc->i4_ctb_cnt = s_job.i2_ctb_cnt; |
1635 | 149k | ps_proc->i4_ctb_x = s_job.i2_ctb_x; |
1636 | 149k | ps_proc->i4_ctb_y = s_job.i2_ctb_y; |
1637 | 149k | ps_proc->i4_cur_slice_idx = s_job.i2_slice_idx; |
1638 | | |
1639 | | |
1640 | | |
1641 | 149k | if(CMD_PROCESS == s_job.i4_cmd) |
1642 | 77.3k | { |
1643 | 77.3k | ihevcd_init_proc_ctxt(ps_proc, s_job.i4_tu_coeff_data_ofst); |
1644 | 77.3k | ihevcd_process(ps_proc); |
1645 | 77.3k | } |
1646 | 72.0k | else if(CMD_FMTCONV == s_job.i4_cmd) |
1647 | 72.2k | { |
1648 | 72.2k | sps_t *ps_sps; |
1649 | 72.2k | codec_t *ps_codec; |
1650 | 72.2k | ivd_out_bufdesc_t *ps_out_buffer; |
1651 | 72.2k | WORD32 num_rows; |
1652 | | |
1653 | 72.2k | if(0 == ps_proc->i4_init_done) |
1654 | 288 | { |
1655 | 288 | ihevcd_init_proc_ctxt(ps_proc, 0); |
1656 | 288 | } |
1657 | 72.2k | ps_sps = ps_proc->ps_sps; |
1658 | 72.2k | ps_codec = ps_proc->ps_codec; |
1659 | 72.2k | ps_out_buffer = ps_proc->ps_out_buffer; |
1660 | 72.2k | num_rows = 1 << ps_sps->i1_log2_ctb_size; |
1661 | | |
1662 | 72.2k | num_rows = MIN(num_rows, |
1663 | 72.2k | (ps_codec->i4_disp_ht - (s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size)) |
1664 | 72.2k | ); |
1665 | | |
1666 | 72.2k | if(num_rows < 0) |
1667 | 0 | num_rows = 0; |
1668 | | |
1669 | 72.2k | ihevcd_fmt_conv(ps_proc->ps_codec, ps_proc, ps_out_buffer->pu1_bufs[0], |
1670 | 72.2k | ps_out_buffer->pu1_bufs[1], ps_out_buffer->pu1_bufs[2], |
1671 | 72.2k | s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size, num_rows); |
1672 | 72.2k | } |
1673 | 149k | } |
1674 | 18.8k | if(ps_proc->ps_codec->i4_threads_active) |
1675 | 18.8k | { |
1676 | 18.8k | ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); |
1677 | 18.8k | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1678 | 0 | break; |
1679 | | |
1680 | 18.8k | ps_dec->ai4_process_done[ps_proc->i4_id] = 1; |
1681 | 18.8k | ithread_cond_signal(ps_dec->apv_proc_done_condition[ps_proc->i4_id]); |
1682 | | |
1683 | 18.8k | ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); |
1684 | 18.8k | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1685 | 0 | break; |
1686 | 18.8k | } |
1687 | 9 | else |
1688 | 9 | { |
1689 | 9 | break; |
1690 | 9 | } |
1691 | 18.8k | } |
1692 | | //ithread_exit(0); |
1693 | 1.00k | return; |
1694 | 1.00k | } |
1695 | | |