/src/libhevc/decoder/ihevcd_process_slice.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevcd_process_slice.c |
22 | | * |
23 | | * @brief |
24 | | * Contains functions for processing slice data |
25 | | * |
26 | | * @author |
27 | | * Harish |
28 | | * |
29 | | * @par List of Functions: |
30 | | * |
31 | | * @remarks |
32 | | * None |
33 | | * |
34 | | ******************************************************************************* |
35 | | */ |
36 | | /*****************************************************************************/ |
37 | | /* File Includes */ |
38 | | /*****************************************************************************/ |
39 | | #include <stdio.h> |
40 | | #include <stddef.h> |
41 | | #include <stdlib.h> |
42 | | #include <string.h> |
43 | | #include <assert.h> |
44 | | |
45 | | #include "ihevc_typedefs.h" |
46 | | #include "iv.h" |
47 | | #include "ivd.h" |
48 | | #include "ihevcd_cxa.h" |
49 | | #include "ithread.h" |
50 | | |
51 | | #include "ihevc_defs.h" |
52 | | #include "ihevc_debug.h" |
53 | | #include "ihevc_defs.h" |
54 | | #include "ihevc_structs.h" |
55 | | #include "ihevc_macros.h" |
56 | | #include "ihevc_platform_macros.h" |
57 | | #include "ihevc_cabac_tables.h" |
58 | | #include "ihevc_padding.h" |
59 | | #include "ihevc_iquant_itrans_recon.h" |
60 | | #include "ihevc_chroma_iquant_itrans_recon.h" |
61 | | #include "ihevc_recon.h" |
62 | | #include "ihevc_chroma_recon.h" |
63 | | #include "ihevc_iquant_recon.h" |
64 | | #include "ihevc_chroma_iquant_recon.h" |
65 | | #include "ihevc_intra_pred.h" |
66 | | |
67 | | #include "ihevc_error.h" |
68 | | #include "ihevc_common_tables.h" |
69 | | #include "ihevc_quant_tables.h" |
70 | | #include "ihevcd_common_tables.h" |
71 | | |
72 | | #include "ihevcd_profile.h" |
73 | | #include "ihevcd_trace.h" |
74 | | #include "ihevcd_defs.h" |
75 | | #include "ihevcd_function_selector.h" |
76 | | #include "ihevcd_structs.h" |
77 | | #include "ihevcd_error.h" |
78 | | #include "ihevcd_nal.h" |
79 | | #include "ihevcd_bitstream.h" |
80 | | #include "ihevcd_job_queue.h" |
81 | | #include "ihevcd_utils.h" |
82 | | #include "ihevcd_debug.h" |
83 | | #include "ihevcd_get_mv.h" |
84 | | #include "ihevcd_inter_pred.h" |
85 | | #include "ihevcd_iquant_itrans_recon_ctb.h" |
86 | | #include "ihevcd_boundary_strength.h" |
87 | | #include "ihevcd_deblk.h" |
88 | | #include "ihevcd_fmt_conv.h" |
89 | | #include "ihevcd_sao.h" |
90 | | #include "ihevcd_profile.h" |
91 | | |
92 | | IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, |
93 | | process_ctxt_t *ps_proc, |
94 | | UWORD8 *pu1_y_dst, |
95 | | UWORD8 *pu1_u_dst, |
96 | | UWORD8 *pu1_v_dst, |
97 | | WORD32 cur_row, |
98 | | WORD32 num_rows); |
99 | | |
100 | | typedef enum |
101 | | { |
102 | | PROC_ALL, |
103 | | PROC_INTER_PRED, |
104 | | PROC_RECON, |
105 | | PROC_DEBLK, |
106 | | PROC_SAO |
107 | | }proc_type_t; |
108 | | |
109 | | void ihevcd_proc_map_check(process_ctxt_t *ps_proc, proc_type_t proc_type, WORD32 nctb) |
110 | 0 | { |
111 | 0 | tile_t *ps_tile = ps_proc->ps_tile; |
112 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
113 | 0 | pps_t *ps_pps = ps_proc->ps_pps; |
114 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
115 | 0 | WORD32 idx; |
116 | 0 | WORD32 nop_cnt; |
117 | 0 | WORD32 bit_pos = proc_type; |
118 | 0 | WORD32 bit_mask = (1 << bit_pos); |
119 | |
|
120 | 0 | if(ps_proc->i4_check_proc_status) |
121 | 0 | { |
122 | 0 | nop_cnt = PROC_NOP_CNT; |
123 | 0 | while(1) |
124 | 0 | { |
125 | 0 | volatile UWORD8 *pu1_buf; |
126 | 0 | volatile WORD32 status; |
127 | 0 | status = 1; |
128 | | /* Check if all dependencies for the next nCTBs are met */ |
129 | 0 | { |
130 | 0 | WORD32 x_pos; |
131 | |
|
132 | 0 | { |
133 | | /* Check if the top right of next nCTBs are processed */ |
134 | 0 | if(ps_proc->i4_ctb_y > 0) |
135 | 0 | { |
136 | 0 | x_pos = (ps_proc->i4_ctb_tile_x + nctb); |
137 | 0 | idx = MIN(x_pos, (ps_tile->u2_wd - 1)); |
138 | | |
139 | | /* Check if top-right CTB for the last CTB in nCTB is within the tile */ |
140 | 0 | { |
141 | 0 | idx += ps_tile->u1_pos_x; |
142 | 0 | idx += ((ps_proc->i4_ctb_y - 1) |
143 | 0 | * ps_sps->i2_pic_wd_in_ctb); |
144 | 0 | pu1_buf = (ps_codec->pu1_proc_map + idx); |
145 | 0 | status = *pu1_buf & bit_mask; |
146 | 0 | } |
147 | 0 | } |
148 | 0 | } |
149 | | |
150 | | /* If tiles are enabled, then test left and top-left as well */ |
151 | 0 | ps_pps = ps_proc->ps_pps; |
152 | 0 | if(ps_pps->i1_tiles_enabled_flag) |
153 | 0 | { |
154 | | /*Check if left ctb is processed*/ |
155 | 0 | if((ps_proc->i4_ctb_x > 0) && ((0 != status))) |
156 | 0 | { |
157 | 0 | x_pos = ps_tile->u1_pos_x + ps_proc->i4_ctb_tile_x - 1; |
158 | 0 | idx = x_pos + (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
159 | 0 | pu1_buf = (ps_codec->pu1_proc_map + idx); |
160 | 0 | status = *pu1_buf & bit_mask; |
161 | 0 | } |
162 | | |
163 | | /*Check if top left ctb is processed*/ |
164 | 0 | if((ps_proc->i4_ctb_x > 0) && (0 != status) && (ps_proc->i4_ctb_y > 0)) |
165 | 0 | { |
166 | 0 | x_pos = ps_tile->u1_pos_x + ps_proc->i4_ctb_tile_x - 1; |
167 | 0 | idx = x_pos + ((ps_proc->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb); |
168 | 0 | pu1_buf = (ps_codec->pu1_proc_map + idx); |
169 | 0 | status = *pu1_buf & bit_mask; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | } |
173 | |
|
174 | 0 | if(status) |
175 | 0 | break; |
176 | | |
177 | | /* if dependencies are not met, then wait for few cycles. |
178 | | * Even after few iterations, if the dependencies are not met then yield |
179 | | */ |
180 | 0 | if(nop_cnt > 0) |
181 | 0 | { |
182 | 0 | NOP(128); |
183 | 0 | nop_cnt -= 128; |
184 | 0 | } |
185 | 0 | else |
186 | 0 | { |
187 | 0 | nop_cnt = PROC_NOP_CNT; |
188 | 0 | ithread_yield(); |
189 | | //NOP(128 * 16); |
190 | 0 | } |
191 | 0 | } |
192 | 0 | DATA_SYNC(); |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | | void ihevcd_proc_map_update(process_ctxt_t *ps_proc, proc_type_t proc_type, WORD32 nctb) |
197 | 0 | { |
198 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
199 | 0 | WORD32 i, idx; |
200 | 0 | WORD32 bit_pos = proc_type; |
201 | 0 | WORD32 bit_mask = (1 << bit_pos); |
202 | | |
203 | | /* Update the current CTBs processing status */ |
204 | 0 | if(ps_proc->i4_check_proc_status) |
205 | 0 | { |
206 | 0 | DATA_SYNC(); |
207 | 0 | for(i = 0; i < nctb; i++) |
208 | 0 | { |
209 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
210 | 0 | UWORD8 *pu1_buf; |
211 | 0 | idx = (ps_proc->i4_ctb_x + i); |
212 | 0 | idx += ((ps_proc->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb); |
213 | 0 | pu1_buf = (ps_codec->pu1_proc_map + idx); |
214 | 0 | *pu1_buf = *pu1_buf | bit_mask; |
215 | 0 | } |
216 | 0 | } |
217 | 0 | } |
218 | | |
219 | | |
220 | | void ihevcd_slice_hdr_update(process_ctxt_t *ps_proc) |
221 | 0 | { |
222 | | |
223 | | /* Slice x and y are initialized in proc_init. But initialize slice x and y count here |
224 | | * if a new slice begins at the middle of a row since proc_init is invoked only at the beginning of each row */ |
225 | 0 | if(!((ps_proc->i4_ctb_x == 0) && (ps_proc->i4_ctb_y == 0))) |
226 | 0 | { |
227 | 0 | slice_header_t *ps_slice_hdr_next = ps_proc->ps_codec->ps_slice_hdr_base + ((ps_proc->i4_cur_slice_idx + 1) & (MAX_SLICE_HDR_CNT - 1)); |
228 | |
|
229 | 0 | if((ps_slice_hdr_next->i2_ctb_x == ps_proc->i4_ctb_x) |
230 | 0 | && (ps_slice_hdr_next->i2_ctb_y == ps_proc->i4_ctb_y)) |
231 | 0 | { |
232 | 0 | if(0 == ps_slice_hdr_next->i1_dependent_slice_flag) |
233 | 0 | { |
234 | 0 | ps_proc->i4_ctb_slice_x = 0; |
235 | 0 | ps_proc->i4_ctb_slice_y = 0; |
236 | 0 | } |
237 | |
|
238 | 0 | ps_proc->i4_cur_slice_idx++; |
239 | 0 | ps_proc->ps_slice_hdr = ps_slice_hdr_next; |
240 | 0 | } |
241 | |
|
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | | void ihevcd_ctb_pos_update(process_ctxt_t *ps_proc, WORD32 nctb) |
246 | 0 | { |
247 | 0 | WORD32 tile_start_ctb_idx, slice_start_ctb_idx; |
248 | 0 | slice_header_t *ps_slice_hdr = ps_proc->ps_slice_hdr; |
249 | 0 | tile_t *ps_tile = ps_proc->ps_tile; |
250 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
251 | | |
252 | | /* Update x and y positions */ |
253 | 0 | ps_proc->i4_ctb_tile_x += nctb; |
254 | 0 | ps_proc->i4_ctb_x += nctb; |
255 | |
|
256 | 0 | ps_proc->i4_ctb_slice_x += nctb; |
257 | | /*If tile are enabled, then handle the tile & slice counters differently*/ |
258 | 0 | if(ps_proc->ps_pps->i1_tiles_enabled_flag) |
259 | 0 | { |
260 | | /* Update slice counters*/ |
261 | 0 | slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
262 | 0 | tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
263 | | /* |
264 | | * There can be 2 cases where slice counters must be handled differently. |
265 | | * 1 - Multiple tiles span across a single/one of the many slice. |
266 | | * 2 - Multiple slices span across a single/one of the many tiles. |
267 | | */ |
268 | | |
269 | | /*Case 1 */ |
270 | 0 | if(slice_start_ctb_idx < tile_start_ctb_idx) |
271 | 0 | { |
272 | | /*End of tile row*/ |
273 | 0 | if(ps_proc->i4_ctb_x > ps_slice_hdr->i2_ctb_x) |
274 | 0 | { |
275 | 0 | if(ps_proc->i4_ctb_slice_x >= (ps_tile->u2_wd + ps_tile->u1_pos_x)) |
276 | 0 | { |
277 | 0 | ps_proc->i4_ctb_slice_y++; |
278 | 0 | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x |
279 | 0 | - ps_tile->u2_wd; |
280 | 0 | } |
281 | 0 | } |
282 | 0 | else |
283 | 0 | { |
284 | 0 | WORD32 temp_stride = (ps_sps->i2_pic_wd_in_ctb - ps_slice_hdr->i2_ctb_x); |
285 | 0 | if(ps_proc->i4_ctb_slice_x >= (temp_stride + ps_tile->u2_wd + ps_tile->u1_pos_x)) |
286 | 0 | { |
287 | 0 | ps_proc->i4_ctb_slice_y++; |
288 | 0 | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x |
289 | 0 | - ps_tile->u2_wd; |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | | /*Case 2*/ |
294 | 0 | else if(ps_proc->i4_ctb_slice_x >= (ps_tile->u2_wd)) |
295 | 0 | { |
296 | | /*End of tile row*/ |
297 | 0 | ps_proc->i4_ctb_slice_y++; |
298 | 0 | ps_proc->i4_ctb_slice_x = 0; |
299 | 0 | } |
300 | 0 | } |
301 | 0 | else |
302 | 0 | { |
303 | 0 | if(ps_proc->i4_ctb_slice_x >= ps_tile->u2_wd) |
304 | 0 | { |
305 | 0 | ps_proc->i4_ctb_slice_y++; |
306 | 0 | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x |
307 | 0 | - ps_tile->u2_wd; |
308 | 0 | } |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | | void ihevcd_ctb_avail_update(process_ctxt_t *ps_proc) |
313 | 0 | { |
314 | 0 | slice_header_t *ps_slice_hdr = ps_proc->ps_slice_hdr; |
315 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
316 | 0 | tile_t *ps_tile_prev; |
317 | 0 | tile_t *ps_tile = ps_proc->ps_tile; |
318 | 0 | WORD32 cur_pu_idx; |
319 | 0 | WORD32 tile_start_ctb_idx, slice_start_ctb_idx; |
320 | 0 | WORD16 i2_wd_in_ctb; |
321 | 0 | WORD32 continuous_tiles = 0; |
322 | 0 | WORD32 cur_ctb_idx; |
323 | 0 | WORD32 check_tile_wd; |
324 | |
|
325 | 0 | if((0 != ps_tile->u1_pos_x) && (0 != ps_tile->u1_pos_y)) |
326 | 0 | { |
327 | 0 | ps_tile_prev = ps_tile - 1; |
328 | 0 | } |
329 | 0 | else |
330 | 0 | { |
331 | 0 | ps_tile_prev = ps_tile; |
332 | 0 | } |
333 | | |
334 | |
|
335 | 0 | check_tile_wd = ps_slice_hdr->i2_ctb_x + ps_tile_prev->u2_wd; |
336 | 0 | if(!(((check_tile_wd >= ps_sps->i2_pic_wd_in_ctb) && (check_tile_wd % ps_sps->i2_pic_wd_in_ctb == ps_tile->u1_pos_x)) |
337 | 0 | || ((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x)))) |
338 | 0 | { |
339 | 0 | continuous_tiles = 1; |
340 | 0 | } |
341 | |
|
342 | 0 | slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
343 | 0 | tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
344 | |
|
345 | 0 | if((slice_start_ctb_idx < tile_start_ctb_idx) && (continuous_tiles)) |
346 | 0 | { |
347 | | //Slices span across multiple tiles. |
348 | 0 | i2_wd_in_ctb = ps_sps->i2_pic_wd_in_ctb; |
349 | 0 | } |
350 | 0 | else |
351 | 0 | { |
352 | 0 | i2_wd_in_ctb = ps_tile->u2_wd; |
353 | 0 | } |
354 | 0 | cur_ctb_idx = ps_proc->i4_ctb_x |
355 | 0 | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
356 | | |
357 | | /* Ctb level availability */ |
358 | | /* Bottom left will not be available at a CTB level, no need to pass this */ |
359 | 0 | ps_proc->u1_top_ctb_avail = 1; |
360 | 0 | ps_proc->u1_left_ctb_avail = 1; |
361 | 0 | ps_proc->u1_top_lt_ctb_avail = 1; |
362 | 0 | ps_proc->u1_top_rt_ctb_avail = 1; |
363 | | /* slice and tile boundaries */ |
364 | |
|
365 | 0 | if((0 == ps_proc->i4_ctb_y) || (0 == ps_proc->i4_ctb_tile_y)) |
366 | 0 | { |
367 | 0 | ps_proc->u1_top_ctb_avail = 0; |
368 | 0 | ps_proc->u1_top_lt_ctb_avail = 0; |
369 | 0 | ps_proc->u1_top_rt_ctb_avail = 0; |
370 | 0 | } |
371 | |
|
372 | 0 | if((0 == ps_proc->i4_ctb_x) || (0 == ps_proc->i4_ctb_tile_x)) |
373 | 0 | { |
374 | 0 | ps_proc->u1_left_ctb_avail = 0; |
375 | 0 | ps_proc->u1_top_lt_ctb_avail = 0; |
376 | 0 | if((0 == ps_proc->i4_ctb_slice_y) || (0 == ps_proc->i4_ctb_tile_y)) |
377 | 0 | { |
378 | 0 | ps_proc->u1_top_ctb_avail = 0; |
379 | 0 | if((i2_wd_in_ctb - 1) != ps_proc->i4_ctb_slice_x) |
380 | 0 | { |
381 | 0 | ps_proc->u1_top_rt_ctb_avail = 0; |
382 | 0 | } |
383 | 0 | } |
384 | 0 | } |
385 | | /*For slices not beginning at start of a ctb row*/ |
386 | 0 | else if(ps_proc->i4_ctb_x > 0) |
387 | 0 | { |
388 | 0 | if((0 == ps_proc->i4_ctb_slice_y) || (0 == ps_proc->i4_ctb_tile_y)) |
389 | 0 | { |
390 | 0 | ps_proc->u1_top_ctb_avail = 0; |
391 | 0 | ps_proc->u1_top_lt_ctb_avail = 0; |
392 | 0 | if(0 == ps_proc->i4_ctb_slice_x) |
393 | 0 | { |
394 | 0 | ps_proc->u1_left_ctb_avail = 0; |
395 | 0 | } |
396 | 0 | if((i2_wd_in_ctb - 1) != ps_proc->i4_ctb_slice_x) |
397 | 0 | { |
398 | 0 | ps_proc->u1_top_rt_ctb_avail = 0; |
399 | 0 | } |
400 | 0 | } |
401 | 0 | else if((1 == ps_proc->i4_ctb_slice_y) && (0 == ps_proc->i4_ctb_slice_x)) |
402 | 0 | { |
403 | 0 | ps_proc->u1_top_lt_ctb_avail = 0; |
404 | 0 | } |
405 | 0 | } |
406 | |
|
407 | 0 | if((ps_proc->i4_ctb_x == (ps_sps->i2_pic_wd_in_ctb - 1)) || ((ps_tile->u2_wd - 1) == ps_proc->i4_ctb_tile_x)) |
408 | 0 | { |
409 | 0 | ps_proc->u1_top_rt_ctb_avail = 0; |
410 | 0 | } |
411 | | |
412 | |
|
413 | 0 | { |
414 | 0 | WORD32 next_ctb_idx; |
415 | 0 | next_ctb_idx = cur_ctb_idx + 1; |
416 | |
|
417 | 0 | if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1)) |
418 | 0 | { |
419 | 0 | if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht) |
420 | 0 | { |
421 | | //Last tile |
422 | 0 | if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb)) |
423 | 0 | { |
424 | 0 | next_ctb_idx = cur_ctb_idx + 1; |
425 | 0 | } |
426 | 0 | else //Not last tile, but new tile |
427 | 0 | { |
428 | 0 | tile_t *ps_tile_next = ps_proc->ps_tile + 1; |
429 | 0 | next_ctb_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
430 | 0 | } |
431 | 0 | } |
432 | 0 | else //End of each tile row |
433 | 0 | { |
434 | 0 | next_ctb_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x; |
435 | 0 | } |
436 | 0 | } |
437 | 0 | ps_proc->i4_next_pu_ctb_cnt = next_ctb_idx; |
438 | 0 | ps_proc->i4_ctb_pu_cnt = |
439 | 0 | ps_proc->pu4_pic_pu_idx[next_ctb_idx] |
440 | 0 | - ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
441 | 0 | cur_pu_idx = ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
442 | 0 | ps_proc->i4_ctb_start_pu_idx = cur_pu_idx; |
443 | 0 | ps_proc->ps_pu = &ps_proc->ps_pic_pu[cur_pu_idx]; |
444 | 0 | } |
445 | 0 | } |
446 | | |
447 | | void ihevcd_update_ctb_tu_cnt(process_ctxt_t *ps_proc) |
448 | 0 | { |
449 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
450 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
451 | 0 | WORD32 cur_ctb_idx; |
452 | |
|
453 | 0 | cur_ctb_idx = ps_proc->i4_ctb_x |
454 | 0 | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
455 | |
|
456 | 0 | { |
457 | 0 | tile_t *ps_tile; |
458 | 0 | WORD32 next_ctb_tu_idx; |
459 | 0 | ps_tile = ps_proc->ps_tile; |
460 | | |
461 | |
|
462 | 0 | if(1 == ps_codec->i4_num_cores) |
463 | 0 | { |
464 | 0 | next_ctb_tu_idx = cur_ctb_idx % RESET_TU_BUF_NCTB + 1; |
465 | 0 | if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1)) |
466 | 0 | { |
467 | 0 | if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht) |
468 | 0 | { |
469 | | //Last tile |
470 | 0 | if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb)) |
471 | 0 | { |
472 | 0 | next_ctb_tu_idx = (cur_ctb_idx % RESET_TU_BUF_NCTB) + 1; |
473 | 0 | } |
474 | 0 | else //Not last tile, but new tile |
475 | 0 | { |
476 | 0 | tile_t *ps_tile_next = ps_proc->ps_tile + 1; |
477 | 0 | next_ctb_tu_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
478 | 0 | } |
479 | 0 | } |
480 | 0 | else //End of each tile row |
481 | 0 | { |
482 | 0 | next_ctb_tu_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x; |
483 | 0 | } |
484 | 0 | } |
485 | 0 | ps_proc->i4_next_tu_ctb_cnt = next_ctb_tu_idx; |
486 | 0 | ps_proc->i4_ctb_tu_cnt = ps_proc->pu4_pic_tu_idx[next_ctb_tu_idx] - ps_proc->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB]; |
487 | 0 | } |
488 | 0 | else |
489 | 0 | { |
490 | 0 | next_ctb_tu_idx = cur_ctb_idx + 1; |
491 | 0 | if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1)) |
492 | 0 | { |
493 | 0 | if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht) |
494 | 0 | { |
495 | | //Last tile |
496 | 0 | if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb)) |
497 | 0 | { |
498 | 0 | next_ctb_tu_idx = (cur_ctb_idx % RESET_TU_BUF_NCTB) + 1; |
499 | 0 | } |
500 | 0 | else //Not last tile, but new tile |
501 | 0 | { |
502 | 0 | tile_t *ps_tile_next = ps_proc->ps_tile + 1; |
503 | 0 | next_ctb_tu_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
504 | 0 | } |
505 | 0 | } |
506 | 0 | else //End of each tile row |
507 | 0 | { |
508 | 0 | next_ctb_tu_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x; |
509 | 0 | } |
510 | 0 | } |
511 | 0 | ps_proc->i4_next_tu_ctb_cnt = next_ctb_tu_idx; |
512 | 0 | ps_proc->i4_ctb_tu_cnt = ps_proc->pu4_pic_tu_idx[next_ctb_tu_idx] - |
513 | 0 | ps_proc->pu4_pic_tu_idx[cur_ctb_idx]; |
514 | 0 | } |
515 | 0 | } |
516 | 0 | } |
517 | | |
518 | | IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) |
519 | 0 | { |
520 | 0 | IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; |
521 | 0 | codec_t *ps_codec; |
522 | 0 | sps_t *ps_sps = ps_proc->ps_sps; |
523 | |
|
524 | 0 | WORD32 nctb; |
525 | 0 | WORD32 i; |
526 | 0 | WORD32 idx; |
527 | 0 | WORD32 nop_cnt; |
528 | 0 | WORD32 num_minpu_in_ctb; |
529 | 0 | WORD32 cur_slice_idx, cur_ctb_tile_x, cur_ctb_slice_x, cur_ctb_tile_y, cur_ctb_slice_y; |
530 | 0 | WORD32 nxt_ctb_slice_y, nxt_ctb_slice_x; |
531 | 0 | tu_t *ps_tu_cur, *ps_tu_nxt; |
532 | 0 | UWORD8 *pu1_pu_map_cur, *pu1_pu_map_nxt; |
533 | 0 | WORD32 num_ctb, num_ctb_tmp; |
534 | 0 | proc_type_t proc_type; |
535 | | |
536 | |
|
537 | 0 | WORD32 ctb_size = 1 << ps_sps->i1_log2_ctb_size; |
538 | |
|
539 | 0 | WORD32 h_samp_factor, v_samp_factor; |
540 | 0 | WORD32 chroma_row_strd; |
541 | 0 | WORD32 chroma_pixel_strd = 2; |
542 | |
|
543 | 0 | h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; |
544 | 0 | v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; |
545 | |
|
546 | 0 | PROFILE_DISABLE_PROCESS_CTB(); |
547 | |
|
548 | 0 | ps_codec = ps_proc->ps_codec; |
549 | 0 | num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE); |
550 | |
|
551 | 0 | nctb = MIN(ps_codec->i4_proc_nctb, ps_proc->i4_ctb_cnt); |
552 | 0 | nctb = MIN(nctb, (ps_proc->ps_tile->u2_wd - ps_proc->i4_ctb_tile_x)); |
553 | |
|
554 | 0 | if(ps_proc->i4_cur_slice_idx > (MAX_SLICE_HDR_CNT - 2 * ps_sps->i2_pic_wd_in_ctb)) |
555 | 0 | { |
556 | 0 | num_ctb = 1; |
557 | 0 | } |
558 | 0 | else |
559 | 0 | { |
560 | 0 | num_ctb = ps_proc->i4_nctb; |
561 | 0 | } |
562 | 0 | nxt_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
563 | 0 | nxt_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
564 | 0 | pu1_pu_map_nxt = ps_proc->pu1_pu_map; |
565 | 0 | ps_tu_nxt = ps_proc->ps_tu; |
566 | |
|
567 | 0 | while(ps_proc->i4_ctb_cnt) |
568 | 0 | { |
569 | 0 | ps_proc->i4_ctb_slice_y = nxt_ctb_slice_y; |
570 | 0 | ps_proc->i4_ctb_slice_x = nxt_ctb_slice_x; |
571 | 0 | ps_proc->pu1_pu_map = pu1_pu_map_nxt; |
572 | 0 | ps_proc->ps_tu = ps_tu_nxt; |
573 | |
|
574 | 0 | cur_ctb_tile_x = ps_proc->i4_ctb_tile_x; |
575 | 0 | cur_ctb_tile_y = ps_proc->i4_ctb_tile_y; |
576 | 0 | cur_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
577 | 0 | cur_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
578 | 0 | cur_slice_idx = ps_proc->i4_cur_slice_idx; |
579 | 0 | ps_tu_cur = ps_proc->ps_tu; |
580 | 0 | pu1_pu_map_cur = ps_proc->pu1_pu_map; |
581 | 0 | proc_type = PROC_INTER_PRED; |
582 | |
|
583 | 0 | if(ps_proc->i4_ctb_cnt < num_ctb) |
584 | 0 | { |
585 | 0 | num_ctb = ps_proc->i4_ctb_cnt; |
586 | 0 | } |
587 | 0 | num_ctb_tmp = num_ctb; |
588 | |
|
589 | 0 | while(num_ctb_tmp) |
590 | 0 | { |
591 | 0 | slice_header_t *ps_slice_hdr; |
592 | 0 | tile_t *ps_tile = ps_proc->ps_tile; |
593 | | |
594 | | /* Waiting for Parsing to be done*/ |
595 | 0 | { |
596 | | |
597 | |
|
598 | 0 | nop_cnt = PROC_NOP_CNT; |
599 | 0 | if(ps_proc->i4_check_parse_status || ps_proc->i4_check_proc_status) |
600 | 0 | { |
601 | 0 | while(1) |
602 | 0 | { |
603 | 0 | volatile UWORD8 *pu1_buf; |
604 | 0 | volatile WORD32 status; |
605 | 0 | status = 1; |
606 | | /* Check if all dependencies for the next nCTBs are met */ |
607 | | /* Check if the next nCTBs are parsed */ |
608 | 0 | if(ps_proc->i4_check_parse_status) |
609 | 0 | { |
610 | 0 | idx = (ps_proc->i4_ctb_x + nctb - 1); |
611 | 0 | idx += (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
612 | 0 | pu1_buf = (ps_codec->pu1_parse_map + idx); |
613 | 0 | status = *pu1_buf; |
614 | 0 | } |
615 | |
|
616 | 0 | if(status) |
617 | 0 | break; |
618 | | |
619 | | /* if dependencies are not met, then wait for few cycles. |
620 | | * Even after few iterations, if the dependencies are not met then yield |
621 | | */ |
622 | 0 | if(nop_cnt > 0) |
623 | 0 | { |
624 | 0 | NOP(128); |
625 | 0 | nop_cnt -= 128; |
626 | 0 | } |
627 | 0 | else |
628 | 0 | { |
629 | 0 | nop_cnt = PROC_NOP_CNT; |
630 | 0 | ithread_yield(); |
631 | 0 | } |
632 | 0 | } |
633 | 0 | } |
634 | 0 | } |
635 | | |
636 | | /* Check proc map to ensure dependencies for recon are met */ |
637 | 0 | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
638 | |
|
639 | 0 | ihevcd_slice_hdr_update(ps_proc); |
640 | 0 | ps_slice_hdr = ps_proc->ps_slice_hdr; |
641 | | |
642 | | //ihevcd_mv_prediction(); |
643 | | //ihevcd_lvl_unpack(); |
644 | | //ihevcd_inter_iq_it_recon(); |
645 | | //Following does prediction, iq, it and recon on a TU by TU basis for intra TUs |
646 | | //ihevcd_intra_process(); |
647 | | //ihevcd_ctb_boundary_strength_islice(ps_proc, ctb_size); |
648 | | //ihevcd_deblk_ctb(ps_proc); |
649 | | |
650 | | /* iq,it recon of Intra TU */ |
651 | 0 | { |
652 | 0 | UWORD32 *pu4_ctb_top_pu_idx, *pu4_ctb_left_pu_idx, *pu4_ctb_top_left_pu_idx; |
653 | 0 | WORD32 cur_ctb_idx; |
654 | |
|
655 | 0 | ihevcd_ctb_avail_update(ps_proc); |
656 | |
|
657 | | #if DEBUG_DUMP_FRAME_BUFFERS_INFO |
658 | | au1_pic_avail_ctb_flags[ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb] = |
659 | | ((ps_proc->u1_top_ctb_avail << 3) | (ps_proc->u1_left_ctb_avail << 2) | (ps_proc->u1_top_lt_ctb_avail << 1) | (ps_proc->u1_top_rt_ctb_avail)); |
660 | | au4_pic_ctb_slice_xy[ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb] = |
661 | | (((UWORD16)ps_proc->i4_ctb_slice_x << 16) | ((UWORD16)ps_proc->i4_ctb_slice_y << 16)); |
662 | | #endif |
663 | | |
664 | | /*************************************************/ |
665 | | /**************** MV pred **********************/ |
666 | | /*************************************************/ |
667 | 0 | if(PSLICE == ps_slice_hdr->i1_slice_type |
668 | 0 | || BSLICE == ps_slice_hdr->i1_slice_type) |
669 | 0 | { |
670 | 0 | mv_ctxt_t s_mv_ctxt; |
671 | |
|
672 | 0 | pu4_ctb_top_pu_idx = ps_proc->pu4_pic_pu_idx_top |
673 | 0 | + (ps_proc->i4_ctb_x * ctb_size / MIN_PU_SIZE); |
674 | 0 | pu4_ctb_left_pu_idx = ps_proc->pu4_pic_pu_idx_left; |
675 | 0 | pu4_ctb_top_left_pu_idx = &ps_proc->u4_ctb_top_left_pu_idx; |
676 | | |
677 | | /* Initializing s_mv_ctxt */ |
678 | 0 | if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD) |
679 | 0 | { |
680 | 0 | s_mv_ctxt.ps_pps = ps_proc->ps_pps; |
681 | 0 | s_mv_ctxt.ps_sps = ps_proc->ps_sps; |
682 | 0 | s_mv_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
683 | 0 | s_mv_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
684 | 0 | s_mv_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
685 | 0 | s_mv_ctxt.ps_pu = ps_proc->ps_pu; |
686 | 0 | s_mv_ctxt.ps_pic_pu = ps_proc->ps_pic_pu; |
687 | 0 | s_mv_ctxt.ps_tile = ps_tile; |
688 | 0 | s_mv_ctxt.pu4_pic_pu_idx_map = ps_proc->pu4_pic_pu_idx_map; |
689 | 0 | s_mv_ctxt.pu4_pic_pu_idx = ps_proc->pu4_pic_pu_idx; |
690 | 0 | s_mv_ctxt.pu1_pic_pu_map = ps_proc->pu1_pic_pu_map; |
691 | 0 | s_mv_ctxt.i4_ctb_pu_cnt = ps_proc->i4_ctb_pu_cnt; |
692 | 0 | s_mv_ctxt.i4_ctb_start_pu_idx = ps_proc->i4_ctb_start_pu_idx; |
693 | 0 | s_mv_ctxt.u1_top_ctb_avail = ps_proc->u1_top_ctb_avail; |
694 | 0 | s_mv_ctxt.u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail; |
695 | 0 | s_mv_ctxt.u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail; |
696 | 0 | s_mv_ctxt.u1_left_ctb_avail = ps_proc->u1_left_ctb_avail; |
697 | |
|
698 | 0 | ihevcd_get_mv_ctb(&s_mv_ctxt, pu4_ctb_top_pu_idx, |
699 | 0 | pu4_ctb_left_pu_idx, pu4_ctb_top_left_pu_idx); |
700 | 0 | } |
701 | |
|
702 | 0 | ihevcd_inter_pred_ctb(ps_proc); |
703 | 0 | } |
704 | 0 | else if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD) |
705 | 0 | { |
706 | 0 | WORD32 next_ctb_idx, num_pu_per_ctb, ctb_start_pu_idx, pu_cnt; |
707 | 0 | pu_t *ps_pu; |
708 | 0 | WORD32 num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE); |
709 | 0 | UWORD8 *pu1_pic_pu_map_ctb = ps_proc->pu1_pic_pu_map + |
710 | 0 | (ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb) * num_minpu_in_ctb; |
711 | 0 | WORD32 row, col; |
712 | 0 | UWORD32 *pu4_nbr_pu_idx = ps_proc->pu4_pic_pu_idx_map; |
713 | 0 | WORD32 nbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2; |
714 | 0 | WORD32 ctb_size_in_min_pu = (ctb_size / MIN_PU_SIZE); |
715 | | |
716 | | /* Neighbor PU idx update inside CTB */ |
717 | | /* 1byte per 4x4. Indicates the PU idx that 4x4 block belongs to */ |
718 | |
|
719 | 0 | cur_ctb_idx = ps_proc->i4_ctb_x |
720 | 0 | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
721 | 0 | next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt; |
722 | 0 | num_pu_per_ctb = ps_proc->pu4_pic_pu_idx[next_ctb_idx] |
723 | 0 | - ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
724 | 0 | ctb_start_pu_idx = ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
725 | 0 | ps_pu = &ps_proc->ps_pic_pu[ctb_start_pu_idx]; |
726 | |
|
727 | 0 | for(pu_cnt = 0; pu_cnt < num_pu_per_ctb; pu_cnt++, ps_pu++) |
728 | 0 | { |
729 | 0 | UWORD32 cur_pu_idx; |
730 | 0 | WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2; |
731 | 0 | WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2; |
732 | |
|
733 | 0 | cur_pu_idx = ctb_start_pu_idx + pu_cnt; |
734 | |
|
735 | 0 | for(row = 0; row < pu_ht / MIN_PU_SIZE; row++) |
736 | 0 | for(col = 0; col < pu_wd / MIN_PU_SIZE; col++) |
737 | 0 | pu4_nbr_pu_idx[(1 + ps_pu->b4_pos_x + col) |
738 | 0 | + (1 + ps_pu->b4_pos_y + row) |
739 | 0 | * nbr_pu_idx_strd] = |
740 | 0 | cur_pu_idx; |
741 | 0 | } |
742 | | |
743 | | /* Updating Top and Left pointers */ |
744 | 0 | { |
745 | 0 | WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples |
746 | 0 | - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size); |
747 | 0 | WORD32 ctb_size_left = MIN(ctb_size, rows_remaining); |
748 | | |
749 | | /* Top Left */ |
750 | | /* saving top left before updating top ptr, as updating top ptr will overwrite the top left for the next ctb */ |
751 | 0 | ps_proc->u4_ctb_top_left_pu_idx = ps_proc->pu4_pic_pu_idx_top[((ps_proc->i4_ctb_x + 1) * ctb_size / MIN_PU_SIZE) - 1]; |
752 | 0 | for(i = 0; i < ctb_size / MIN_PU_SIZE; i++) |
753 | 0 | { |
754 | | /* Left */ |
755 | | /* Last column of au4_nbr_pu_idx */ |
756 | 0 | ps_proc->pu4_pic_pu_idx_left[i] = |
757 | 0 | pu4_nbr_pu_idx[(ctb_size / MIN_PU_SIZE) + (i + 1) * nbr_pu_idx_strd]; |
758 | | /* Top */ |
759 | | /* Last row of au4_nbr_pu_idx */ |
760 | 0 | ps_proc->pu4_pic_pu_idx_top[(ps_proc->i4_ctb_x * ctb_size / MIN_PU_SIZE) + i] = |
761 | 0 | pu4_nbr_pu_idx[(ctb_size_left / MIN_PU_SIZE) * nbr_pu_idx_strd + i + 1]; |
762 | |
|
763 | 0 | } |
764 | | |
765 | | /* Updating the CTB level PU idx (Used for collocated MV pred)*/ |
766 | 0 | { |
767 | 0 | WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map; |
768 | 0 | WORD32 first_pu_of_ctb; |
769 | 0 | first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd]; |
770 | 0 | UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples |
771 | 0 | - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu); |
772 | 0 | UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples |
773 | 0 | - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu); |
774 | |
|
775 | 0 | index_pic_map = 0 * ctb_size_in_min_pu + 0; |
776 | 0 | index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1); |
777 | |
|
778 | 0 | for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++) |
779 | 0 | { |
780 | 0 | for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++) |
781 | 0 | { |
782 | 0 | pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col] |
783 | 0 | - first_pu_of_ctb; |
784 | 0 | } |
785 | 0 | index_pic_map += ctb_size_in_min_pu; |
786 | 0 | index_nbr_map += nbr_pu_idx_strd; |
787 | 0 | } |
788 | 0 | } |
789 | 0 | } |
790 | 0 | } |
791 | 0 | } |
792 | |
|
793 | 0 | if(ps_proc->ps_pps->i1_tiles_enabled_flag) |
794 | 0 | { |
795 | | /*Update the tile index buffer with tile information for the current ctb*/ |
796 | 0 | UWORD16 *pu1_tile_idx = ps_proc->pu1_tile_idx; |
797 | 0 | pu1_tile_idx[(ps_proc->i4_ctb_x + (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb))] |
798 | 0 | = ps_proc->i4_cur_tile_idx; |
799 | 0 | } |
800 | | |
801 | | /*************************************************/ |
802 | | /*********** BS, QP and Deblocking **************/ |
803 | | /*************************************************/ |
804 | | /* Boundary strength call has to be after IQ IT recon since QP population needs ps_proc->i4_qp_const_inc_ctb flag */ |
805 | |
|
806 | 0 | { |
807 | 0 | slice_header_t *ps_slice_hdr; |
808 | 0 | ps_slice_hdr = ps_proc->ps_slice_hdr; |
809 | | |
810 | | |
811 | | /* Check if deblock is disabled for the current slice or if it is disabled for the current picture |
812 | | * because of disable deblock api |
813 | | */ |
814 | 0 | if(0 == ps_codec->i4_disable_deblk_pic) |
815 | 0 | { |
816 | 0 | if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD) |
817 | 0 | { |
818 | | /* Boundary strength calculation is done irrespective of whether deblocking is disabled |
819 | | * in the slice or not, to handle deblocking slice boundaries */ |
820 | 0 | if((0 == ps_codec->i4_slice_error)) |
821 | 0 | { |
822 | 0 | ihevcd_update_ctb_tu_cnt(ps_proc); |
823 | 0 | ps_proc->s_bs_ctxt.ps_pps = ps_proc->ps_pps; |
824 | 0 | ps_proc->s_bs_ctxt.ps_sps = ps_proc->ps_sps; |
825 | 0 | ps_proc->s_bs_ctxt.ps_codec = ps_proc->ps_codec; |
826 | 0 | ps_proc->s_bs_ctxt.i4_ctb_tu_cnt = ps_proc->i4_ctb_tu_cnt; |
827 | 0 | ps_proc->s_bs_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
828 | 0 | ps_proc->s_bs_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
829 | 0 | ps_proc->s_bs_ctxt.i4_ctb_tile_x = ps_proc->i4_ctb_tile_x; |
830 | 0 | ps_proc->s_bs_ctxt.i4_ctb_tile_y = ps_proc->i4_ctb_tile_y; |
831 | 0 | ps_proc->s_bs_ctxt.i4_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
832 | 0 | ps_proc->s_bs_ctxt.i4_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
833 | 0 | ps_proc->s_bs_ctxt.ps_tu = ps_proc->ps_tu; |
834 | 0 | ps_proc->s_bs_ctxt.ps_pu = ps_proc->ps_pu; |
835 | 0 | ps_proc->s_bs_ctxt.pu4_pic_pu_idx_map = ps_proc->pu4_pic_pu_idx_map; |
836 | 0 | ps_proc->s_bs_ctxt.i4_next_pu_ctb_cnt = ps_proc->i4_next_pu_ctb_cnt; |
837 | 0 | ps_proc->s_bs_ctxt.i4_next_tu_ctb_cnt = ps_proc->i4_next_tu_ctb_cnt; |
838 | 0 | ps_proc->s_bs_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx; |
839 | 0 | ps_proc->s_bs_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
840 | 0 | ps_proc->s_bs_ctxt.ps_tile = ps_proc->ps_tile; |
841 | |
|
842 | 0 | if(ISLICE == ps_slice_hdr->i1_slice_type) |
843 | 0 | { |
844 | 0 | ihevcd_ctb_boundary_strength_islice(&ps_proc->s_bs_ctxt); |
845 | 0 | } |
846 | 0 | else |
847 | 0 | { |
848 | 0 | ihevcd_ctb_boundary_strength_pbslice(&ps_proc->s_bs_ctxt); |
849 | 0 | } |
850 | 0 | } |
851 | | |
852 | | /* Boundary strength is set to zero if deblocking is disabled for the current slice */ |
853 | 0 | if((0 != ps_slice_hdr->i1_slice_disable_deblocking_filter_flag)) |
854 | 0 | { |
855 | 0 | WORD32 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) * (ctb_size * ctb_size / 8 / 16); |
856 | |
|
857 | 0 | UWORD32 *pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_proc->s_bs_ctxt.pu4_pic_vert_bs + |
858 | 0 | ps_proc->i4_ctb_x * (ctb_size * ctb_size / 8 / 16) + |
859 | 0 | ps_proc->i4_ctb_y * bs_strd); |
860 | 0 | UWORD32 *pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_proc->s_bs_ctxt.pu4_pic_horz_bs + |
861 | 0 | ps_proc->i4_ctb_x * (ctb_size * ctb_size / 8 / 16) + |
862 | 0 | ps_proc->i4_ctb_y * bs_strd); |
863 | |
|
864 | 0 | memset(pu4_vert_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2); |
865 | 0 | memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2); |
866 | 0 | } |
867 | 0 | } |
868 | 0 | } |
869 | 0 | } |
870 | | |
871 | | /* Per CTB update the following */ |
872 | 0 | { |
873 | 0 | WORD32 cur_ctb_idx = ps_proc->i4_ctb_x |
874 | 0 | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
875 | 0 | cur_ctb_idx++; |
876 | |
|
877 | 0 | ps_proc->pu1_pu_map += nctb * num_minpu_in_ctb; |
878 | 0 | ps_proc->ps_tu += ps_proc->i4_ctb_tu_cnt; |
879 | 0 | if((1 == ps_codec->i4_num_cores) && |
880 | 0 | (0 == cur_ctb_idx % RESET_TU_BUF_NCTB)) |
881 | 0 | { |
882 | 0 | ps_proc->ps_tu = ps_proc->ps_pic_tu; |
883 | 0 | } |
884 | 0 | ps_proc->ps_pu += ps_proc->i4_ctb_pu_cnt; |
885 | 0 | } |
886 | | |
887 | | /* Update proc map for recon*/ |
888 | 0 | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
889 | |
|
890 | 0 | num_ctb_tmp -= nctb; |
891 | 0 | ihevcd_ctb_pos_update(ps_proc, nctb); |
892 | |
|
893 | 0 | } |
894 | |
|
895 | 0 | if(cur_slice_idx != ps_proc->i4_cur_slice_idx) |
896 | 0 | { |
897 | 0 | ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1)); |
898 | 0 | ps_proc->i4_cur_slice_idx = cur_slice_idx; |
899 | 0 | } |
900 | | /* Restore the saved variables */ |
901 | 0 | num_ctb_tmp = num_ctb; |
902 | 0 | ps_proc->i4_ctb_x -= num_ctb; |
903 | 0 | ps_proc->i4_ctb_tile_x = cur_ctb_tile_x; |
904 | 0 | ps_proc->i4_ctb_slice_x = cur_ctb_slice_x; |
905 | 0 | ps_proc->i4_ctb_tile_y = cur_ctb_tile_y; |
906 | 0 | ps_proc->i4_ctb_slice_y = cur_ctb_slice_y; |
907 | 0 | ps_proc->pu1_pu_map = pu1_pu_map_cur; |
908 | 0 | ps_proc->ps_tu = ps_tu_cur; |
909 | 0 | proc_type = PROC_RECON; |
910 | |
|
911 | 0 | while(num_ctb_tmp) |
912 | 0 | { |
913 | | |
914 | | /* Check proc map to ensure dependencies for recon are met */ |
915 | 0 | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
916 | |
|
917 | 0 | ihevcd_slice_hdr_update(ps_proc); |
918 | |
|
919 | 0 | { |
920 | |
|
921 | 0 | ihevcd_ctb_avail_update(ps_proc); |
922 | | |
923 | | /*************************************************/ |
924 | | /**************** IQ IT RECON *******************/ |
925 | | /*************************************************/ |
926 | |
|
927 | 0 | ihevcd_update_ctb_tu_cnt(ps_proc); |
928 | | |
929 | | /* When scaling matrix is not to be used(scaling_list_enable_flag is zero in SPS), |
930 | | * default value of 16 has to be used. Since the value is same for all sizes, |
931 | | * same table is used for all cases. |
932 | | */ |
933 | 0 | if(0 == ps_sps->i1_scaling_list_enable_flag) |
934 | 0 | { |
935 | 0 | ps_proc->api2_dequant_intra_matrix[0] = |
936 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
937 | 0 | ps_proc->api2_dequant_intra_matrix[1] = |
938 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
939 | 0 | ps_proc->api2_dequant_intra_matrix[2] = |
940 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
941 | 0 | ps_proc->api2_dequant_intra_matrix[3] = |
942 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
943 | |
|
944 | 0 | ps_proc->api2_dequant_inter_matrix[0] = |
945 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
946 | 0 | ps_proc->api2_dequant_inter_matrix[1] = |
947 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
948 | 0 | ps_proc->api2_dequant_inter_matrix[2] = |
949 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
950 | 0 | ps_proc->api2_dequant_inter_matrix[3] = |
951 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
952 | 0 | } |
953 | 0 | else |
954 | 0 | { |
955 | 0 | if(0 == ps_sps->i1_sps_scaling_list_data_present_flag) |
956 | 0 | { |
957 | 0 | ps_proc->api2_dequant_intra_matrix[0] = |
958 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
959 | 0 | ps_proc->api2_dequant_intra_matrix[1] = |
960 | 0 | (WORD16 *)gi2_intra_default_scale_mat_8x8; |
961 | 0 | ps_proc->api2_dequant_intra_matrix[2] = |
962 | 0 | (WORD16 *)gi2_intra_default_scale_mat_16x16; |
963 | 0 | ps_proc->api2_dequant_intra_matrix[3] = |
964 | 0 | (WORD16 *)gi2_intra_default_scale_mat_32x32; |
965 | |
|
966 | 0 | ps_proc->api2_dequant_inter_matrix[0] = |
967 | 0 | (WORD16 *)gi2_flat_scale_mat_32x32; |
968 | 0 | ps_proc->api2_dequant_inter_matrix[1] = |
969 | 0 | (WORD16 *)gi2_inter_default_scale_mat_8x8; |
970 | 0 | ps_proc->api2_dequant_inter_matrix[2] = |
971 | 0 | (WORD16 *)gi2_inter_default_scale_mat_16x16; |
972 | 0 | ps_proc->api2_dequant_inter_matrix[3] = |
973 | 0 | (WORD16 *)gi2_inter_default_scale_mat_32x32; |
974 | 0 | } |
975 | | /*TODO: Add support for custom scaling matrices */ |
976 | 0 | } |
977 | | |
978 | | /* CTB Level pointers */ |
979 | 0 | ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma |
980 | 0 | + (ps_proc->i4_ctb_x * ctb_size |
981 | 0 | + ps_proc->i4_ctb_y * ctb_size |
982 | 0 | * ps_codec->i4_strd); |
983 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
984 | 0 | { |
985 | 0 | ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma |
986 | 0 | + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor) |
987 | 0 | + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd * chroma_pixel_strd / (h_samp_factor * v_samp_factor)); |
988 | 0 | } |
989 | |
|
990 | 0 | ihevcd_iquant_itrans_recon_ctb(ps_proc); |
991 | 0 | } |
992 | | |
993 | | /* Per CTB update the following */ |
994 | 0 | { |
995 | 0 | WORD32 cur_ctb_idx = ps_proc->i4_ctb_x |
996 | 0 | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
997 | 0 | cur_ctb_idx++; |
998 | |
|
999 | 0 | ps_proc->pu1_pu_map += nctb * num_minpu_in_ctb; |
1000 | 0 | ps_proc->ps_tu += ps_proc->i4_ctb_tu_cnt; |
1001 | 0 | if((1 == ps_codec->i4_num_cores) && |
1002 | 0 | (0 == cur_ctb_idx % RESET_TU_BUF_NCTB)) |
1003 | 0 | { |
1004 | 0 | ps_proc->ps_tu = ps_proc->ps_pic_tu; |
1005 | 0 | } |
1006 | 0 | ps_proc->ps_pu += ps_proc->i4_ctb_pu_cnt; |
1007 | 0 | } |
1008 | | |
1009 | | |
1010 | | /* Update proc map for recon*/ |
1011 | 0 | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
1012 | |
|
1013 | 0 | num_ctb_tmp -= nctb; |
1014 | 0 | ihevcd_ctb_pos_update(ps_proc, nctb); |
1015 | 0 | } |
1016 | |
|
1017 | 0 | if(cur_slice_idx != ps_proc->i4_cur_slice_idx) |
1018 | 0 | { |
1019 | 0 | ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1)); |
1020 | 0 | ps_proc->i4_cur_slice_idx = cur_slice_idx; |
1021 | 0 | } |
1022 | | /* Restore the saved variables */ |
1023 | 0 | num_ctb_tmp = num_ctb; |
1024 | 0 | ps_proc->i4_ctb_x -= num_ctb; |
1025 | 0 | ps_proc->i4_ctb_tile_x = cur_ctb_tile_x; |
1026 | 0 | ps_proc->i4_ctb_slice_x = cur_ctb_slice_x; |
1027 | 0 | ps_proc->i4_ctb_tile_y = cur_ctb_tile_y; |
1028 | 0 | ps_proc->i4_ctb_slice_y = cur_ctb_slice_y; |
1029 | 0 | pu1_pu_map_nxt = ps_proc->pu1_pu_map; |
1030 | 0 | ps_tu_nxt = ps_proc->ps_tu; |
1031 | 0 | ps_proc->pu1_pu_map = pu1_pu_map_cur; |
1032 | 0 | ps_proc->ps_tu = ps_tu_cur; |
1033 | 0 | proc_type = PROC_DEBLK; |
1034 | |
|
1035 | 0 | while(num_ctb_tmp) |
1036 | 0 | { |
1037 | | |
1038 | | |
1039 | | /* Check proc map to ensure dependencies for deblk are met */ |
1040 | 0 | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
1041 | |
|
1042 | 0 | ihevcd_slice_hdr_update(ps_proc); |
1043 | | |
1044 | |
|
1045 | 0 | if(0 == ps_codec->i4_disable_deblk_pic) |
1046 | 0 | { |
1047 | 0 | WORD32 i4_is_last_ctb_x = 0; |
1048 | 0 | WORD32 i4_is_last_ctb_y = 0; |
1049 | | |
1050 | | |
1051 | | /* Deblocking is done irrespective of whether it is disabled in the slice or not, |
1052 | | * to handle deblocking the slice boundaries */ |
1053 | 0 | { |
1054 | 0 | ps_proc->s_deblk_ctxt.ps_pps = ps_proc->ps_pps; |
1055 | 0 | ps_proc->s_deblk_ctxt.ps_sps = ps_proc->ps_sps; |
1056 | 0 | ps_proc->s_deblk_ctxt.ps_codec = ps_proc->ps_codec; |
1057 | 0 | ps_proc->s_deblk_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
1058 | 0 | ps_proc->s_deblk_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
1059 | 0 | ps_proc->s_deblk_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
1060 | 0 | ps_proc->s_deblk_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx; |
1061 | 0 | ps_proc->s_deblk_ctxt.is_chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); |
1062 | | |
1063 | | /* Populating Current CTB's no_loop_filter flags */ |
1064 | 0 | { |
1065 | 0 | WORD32 row; |
1066 | 0 | WORD32 log2_ctb_size = ps_sps->i1_log2_ctb_size; |
1067 | | |
1068 | | /* Loop filter strd in units of num bits */ |
1069 | 0 | WORD32 loop_filter_strd = ((ps_sps->i2_pic_width_in_luma_samples + 63) >> 6) << 3; |
1070 | | /* Bit position is the current 8x8 bit offset wrt pic_no_loop_filter |
1071 | | * bit_pos has to be a WOR32 so that when it is negative, the downshift still retains it to be a negative value */ |
1072 | 0 | WORD32 bit_pos = ((ps_proc->i4_ctb_y << (log2_ctb_size - 3)) - 1) * loop_filter_strd + (ps_proc->i4_ctb_x << (log2_ctb_size - 3)) - 1; |
1073 | |
|
1074 | 0 | for(row = 0; row < (ctb_size >> 3) + 1; row++) |
1075 | 0 | { |
1076 | | /* Go to the corresponding byte - read 32 bits and downshift */ |
1077 | 0 | ps_proc->s_deblk_ctxt.au2_ctb_no_loop_filter_flag[row] = (*(UWORD32 *)(ps_proc->pu1_pic_no_loop_filter_flag + (bit_pos >> 3))) >> (bit_pos & 7); |
1078 | 0 | bit_pos += loop_filter_strd; |
1079 | 0 | } |
1080 | 0 | } |
1081 | |
|
1082 | 0 | ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y); |
1083 | | |
1084 | | /* If the last CTB in the row was a complete CTB then deblocking has to be called from remaining pixels, since deblocking |
1085 | | * is applied on a shifted CTB structure |
1086 | | */ |
1087 | 0 | if(ps_proc->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1) |
1088 | 0 | { |
1089 | 0 | WORD32 i4_is_last_ctb_x = 1; |
1090 | 0 | WORD32 i4_is_last_ctb_y = 0; |
1091 | |
|
1092 | 0 | WORD32 last_x_pos; |
1093 | 0 | last_x_pos = (ps_sps->i2_pic_wd_in_ctb << ps_sps->i1_log2_ctb_size); |
1094 | 0 | if(last_x_pos == ps_sps->i2_pic_width_in_luma_samples) |
1095 | 0 | { |
1096 | 0 | ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y); |
1097 | 0 | } |
1098 | 0 | } |
1099 | | |
1100 | | |
1101 | | /* If the last CTB in the column was a complete CTB then deblocking has to be called from remaining pixels, since deblocking |
1102 | | * is applied on a shifted CTB structure |
1103 | | */ |
1104 | 0 | if(ps_proc->i4_ctb_y == ps_sps->i2_pic_ht_in_ctb - 1) |
1105 | 0 | { |
1106 | 0 | WORD32 i4_is_last_ctb_x = 0; |
1107 | 0 | WORD32 i4_is_last_ctb_y = 1; |
1108 | 0 | WORD32 last_y_pos; |
1109 | 0 | last_y_pos = (ps_sps->i2_pic_ht_in_ctb << ps_sps->i1_log2_ctb_size); |
1110 | 0 | if(last_y_pos == ps_sps->i2_pic_height_in_luma_samples) |
1111 | 0 | { |
1112 | 0 | ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y); |
1113 | 0 | } |
1114 | 0 | } |
1115 | 0 | } |
1116 | 0 | } |
1117 | | |
1118 | | /* Update proc map for deblk*/ |
1119 | 0 | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
1120 | |
|
1121 | 0 | num_ctb_tmp -= nctb; |
1122 | 0 | ihevcd_ctb_pos_update(ps_proc, nctb); |
1123 | 0 | } |
1124 | |
|
1125 | 0 | if(cur_slice_idx != ps_proc->i4_cur_slice_idx) |
1126 | 0 | { |
1127 | 0 | ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1)); |
1128 | 0 | ps_proc->i4_cur_slice_idx = cur_slice_idx; |
1129 | 0 | } |
1130 | | /* Restore the saved variables */ |
1131 | 0 | num_ctb_tmp = num_ctb; |
1132 | 0 | ps_proc->i4_ctb_x -= num_ctb; |
1133 | 0 | ps_proc->i4_ctb_tile_x = cur_ctb_tile_x; |
1134 | 0 | ps_proc->i4_ctb_tile_y = cur_ctb_tile_y; |
1135 | 0 | ps_proc->pu1_pu_map = pu1_pu_map_cur; |
1136 | 0 | ps_proc->ps_tu = ps_tu_cur; |
1137 | 0 | nxt_ctb_slice_y = ps_proc->i4_ctb_slice_y; |
1138 | 0 | nxt_ctb_slice_x = ps_proc->i4_ctb_slice_x; |
1139 | 0 | ps_proc->i4_ctb_slice_y = cur_ctb_slice_y; |
1140 | 0 | ps_proc->i4_ctb_slice_x = cur_ctb_slice_x; |
1141 | 0 | proc_type = PROC_SAO; |
1142 | |
|
1143 | 0 | while(num_ctb_tmp) |
1144 | 0 | { |
1145 | | |
1146 | | |
1147 | | /* Check proc map to ensure dependencies for SAO are met */ |
1148 | 0 | ihevcd_proc_map_check(ps_proc, proc_type, nctb); |
1149 | |
|
1150 | 0 | ihevcd_slice_hdr_update(ps_proc); |
1151 | | |
1152 | | |
1153 | |
|
1154 | 0 | { |
1155 | | /* SAO is done even when it is disabled in the current slice, because |
1156 | | * it is performed on a shifted CTB and the neighbor CTBs can belong |
1157 | | * to different slices with SAO enabled */ |
1158 | 0 | if(0 == ps_codec->i4_disable_sao_pic) |
1159 | 0 | { |
1160 | 0 | ps_proc->s_sao_ctxt.ps_pps = ps_proc->ps_pps; |
1161 | 0 | ps_proc->s_sao_ctxt.ps_sps = ps_proc->ps_sps; |
1162 | 0 | ps_proc->s_sao_ctxt.ps_tile = ps_proc->ps_tile; |
1163 | 0 | ps_proc->s_sao_ctxt.ps_codec = ps_proc->ps_codec; |
1164 | 0 | ps_proc->s_sao_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr; |
1165 | 0 | ps_proc->s_sao_ctxt.i4_cur_slice_idx = ps_proc->i4_cur_slice_idx; |
1166 | | |
1167 | |
|
1168 | 0 | ps_proc->s_sao_ctxt.i4_ctb_x = ps_proc->i4_ctb_x; |
1169 | 0 | ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y; |
1170 | 0 | ps_proc->s_sao_ctxt.is_chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); |
1171 | |
|
1172 | 0 | ihevcd_sao_shift_ctb(&ps_proc->s_sao_ctxt); |
1173 | 0 | } |
1174 | | |
1175 | | |
1176 | | /* Call padding if required */ |
1177 | 0 | { |
1178 | 0 | chroma_row_strd = ps_codec->i4_strd * chroma_pixel_strd / h_samp_factor; |
1179 | | |
1180 | |
|
1181 | 0 | if(0 == ps_proc->i4_ctb_x) |
1182 | 0 | { |
1183 | 0 | WORD32 pad_ht_luma; |
1184 | 0 | WORD32 pad_ht_chroma; |
1185 | |
|
1186 | 0 | ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma |
1187 | 0 | + (ps_proc->i4_ctb_x * ctb_size |
1188 | 0 | + ps_proc->i4_ctb_y * ctb_size |
1189 | 0 | * ps_codec->i4_strd); |
1190 | |
|
1191 | 0 | pad_ht_luma = ctb_size; |
1192 | 0 | pad_ht_luma += (ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y ? 8 : 0; |
1193 | | /* Pad left after 1st CTB is processed */ |
1194 | 0 | ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_ctb_luma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); |
1195 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
1196 | 0 | { |
1197 | 0 | ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma |
1198 | 0 | + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor) |
1199 | 0 | + (ps_proc->i4_ctb_y * ctb_size * chroma_row_strd / v_samp_factor); |
1200 | 0 | pad_ht_chroma = ctb_size / v_samp_factor; |
1201 | 0 | ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr( |
1202 | 0 | ps_proc->pu1_cur_ctb_chroma - (8 * v_samp_factor) * chroma_row_strd, |
1203 | 0 | chroma_row_strd, pad_ht_chroma, PAD_LEFT * chroma_pixel_strd / h_samp_factor); |
1204 | 0 | } |
1205 | 0 | } |
1206 | |
|
1207 | 0 | if((ps_sps->i2_pic_wd_in_ctb - 1) == ps_proc->i4_ctb_x) |
1208 | 0 | { |
1209 | 0 | WORD32 pad_ht_luma; |
1210 | 0 | WORD32 pad_ht_chroma; |
1211 | 0 | WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size); |
1212 | |
|
1213 | 0 | ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma |
1214 | 0 | + (ps_proc->i4_ctb_x * ctb_size |
1215 | 0 | + ps_proc->i4_ctb_y * ctb_size |
1216 | 0 | * ps_codec->i4_strd); |
1217 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
1218 | 0 | { |
1219 | 0 | ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma |
1220 | 0 | + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor) |
1221 | 0 | + (ps_proc->i4_ctb_y * ctb_size * chroma_row_strd / v_samp_factor); |
1222 | 0 | pad_ht_chroma = ctb_size / v_samp_factor; |
1223 | 0 | } |
1224 | 0 | pad_ht_luma = ctb_size; |
1225 | 0 | if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) |
1226 | 0 | { |
1227 | 0 | pad_ht_luma += 8; |
1228 | 0 | if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
1229 | 0 | { |
1230 | 0 | pad_ht_chroma += (8 * v_samp_factor); |
1231 | 0 | ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr( |
1232 | 0 | ps_proc->pu1_cur_pic_chroma + ((ps_sps->i2_pic_height_in_luma_samples / v_samp_factor) - (8 * v_samp_factor)) * chroma_row_strd, |
1233 | 0 | chroma_row_strd, (8 * v_samp_factor), |
1234 | 0 | PAD_LEFT * chroma_pixel_strd / h_samp_factor); |
1235 | 0 | } |
1236 | 0 | } |
1237 | | /* Pad right after last CTB in the current row is processed */ |
1238 | 0 | ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(ps_proc->pu1_cur_ctb_luma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT); |
1239 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
1240 | 0 | { |
1241 | 0 | ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr( |
1242 | 0 | ps_proc->pu1_cur_ctb_chroma + (cols_remaining * chroma_pixel_strd / h_samp_factor) - (8 * v_samp_factor) * chroma_row_strd, |
1243 | 0 | chroma_row_strd, pad_ht_chroma, |
1244 | 0 | PAD_RIGHT * chroma_pixel_strd / h_samp_factor); |
1245 | 0 | } |
1246 | |
|
1247 | 0 | if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) |
1248 | 0 | { |
1249 | 0 | UWORD8 *pu1_buf; |
1250 | | /* Since SAO is shifted by 8x8, chroma padding can not be done till second row is processed */ |
1251 | | /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */ |
1252 | | /* Pad top after padding left and right for current rows after processing 1st CTB row */ |
1253 | 0 | ihevc_pad_top(ps_proc->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP); |
1254 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
1255 | 0 | { |
1256 | 0 | ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT * (chroma_pixel_strd / h_samp_factor), |
1257 | 0 | chroma_row_strd, |
1258 | 0 | (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor), |
1259 | 0 | PAD_TOP / v_samp_factor); |
1260 | 0 | } |
1261 | | |
1262 | | /* Pad bottom after padding left and right for current rows after processing 1st CTB row */ |
1263 | 0 | pu1_buf = ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT; |
1264 | 0 | ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT); |
1265 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
1266 | 0 | { |
1267 | 0 | pu1_buf = ps_proc->pu1_cur_pic_chroma |
1268 | 0 | + chroma_row_strd * (ps_sps->i2_pic_height_in_luma_samples / v_samp_factor) |
1269 | 0 | - (PAD_LEFT * chroma_pixel_strd / h_samp_factor); |
1270 | 0 | ihevc_pad_bottom(pu1_buf, |
1271 | 0 | chroma_row_strd, |
1272 | 0 | (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor), |
1273 | 0 | PAD_BOT / v_samp_factor); |
1274 | 0 | } |
1275 | 0 | } |
1276 | 0 | } |
1277 | |
|
1278 | 0 | } |
1279 | 0 | } |
1280 | | |
1281 | | |
1282 | | /* Update proc map for SAO*/ |
1283 | 0 | ihevcd_proc_map_update(ps_proc, proc_type, nctb); |
1284 | | /* Update proc map for Completion of CTB*/ |
1285 | 0 | ihevcd_proc_map_update(ps_proc, PROC_ALL, nctb); |
1286 | 0 | { |
1287 | 0 | tile_t *ps_tile; |
1288 | |
|
1289 | 0 | ps_tile = ps_proc->ps_tile; |
1290 | 0 | num_ctb_tmp -= nctb; |
1291 | |
|
1292 | 0 | ps_proc->i4_ctb_tile_x += nctb; |
1293 | 0 | ps_proc->i4_ctb_x += nctb; |
1294 | |
|
1295 | 0 | ps_proc->i4_ctb_slice_x += nctb; |
1296 | | |
1297 | | |
1298 | | /* Update tile counters */ |
1299 | 0 | if(ps_proc->i4_ctb_tile_x >= (ps_tile->u2_wd)) |
1300 | 0 | { |
1301 | | /*End of tile row*/ |
1302 | 0 | ps_proc->i4_ctb_tile_x = 0; |
1303 | 0 | ps_proc->i4_ctb_x = ps_tile->u1_pos_x; |
1304 | |
|
1305 | 0 | ps_proc->i4_ctb_tile_y++; |
1306 | 0 | ps_proc->i4_ctb_y++; |
1307 | 0 | if(ps_proc->i4_ctb_tile_y == ps_tile->u2_ht) |
1308 | 0 | { |
1309 | | /* Reached End of Tile */ |
1310 | 0 | ps_proc->i4_ctb_tile_y = 0; |
1311 | 0 | ps_proc->i4_ctb_tile_x = 0; |
1312 | 0 | ps_proc->ps_tile++; |
1313 | | //End of picture |
1314 | 0 | if(!((ps_tile->u2_ht + ps_tile->u1_pos_y == ps_sps->i2_pic_ht_in_ctb) && (ps_tile->u2_wd + ps_tile->u1_pos_x == ps_sps->i2_pic_wd_in_ctb))) |
1315 | 0 | { |
1316 | 0 | ps_tile = ps_proc->ps_tile; |
1317 | 0 | ps_proc->i4_ctb_x = ps_tile->u1_pos_x; |
1318 | 0 | ps_proc->i4_ctb_y = ps_tile->u1_pos_y; |
1319 | |
|
1320 | 0 | } |
1321 | 0 | } |
1322 | 0 | } |
1323 | 0 | } |
1324 | 0 | } |
1325 | |
|
1326 | 0 | ps_proc->i4_ctb_cnt -= num_ctb; |
1327 | 0 | } |
1328 | 0 | return ret; |
1329 | 0 | } |
1330 | | |
1331 | | void ihevcd_init_proc_ctxt(process_ctxt_t *ps_proc, WORD32 tu_coeff_data_ofst) |
1332 | 0 | { |
1333 | 0 | codec_t *ps_codec; |
1334 | 0 | slice_header_t *ps_slice_hdr; |
1335 | 0 | pps_t *ps_pps; |
1336 | 0 | sps_t *ps_sps; |
1337 | 0 | tile_t *ps_tile, *ps_tile_prev; |
1338 | 0 | WORD32 tile_idx; |
1339 | 0 | WORD32 ctb_size; |
1340 | 0 | WORD32 num_minpu_in_ctb; |
1341 | 0 | WORD32 num_ctb_in_row; |
1342 | 0 | WORD32 ctb_addr; |
1343 | 0 | WORD32 i4_wd_in_ctb; |
1344 | 0 | WORD32 tile_start_ctb_idx; |
1345 | 0 | WORD32 slice_start_ctb_idx; |
1346 | 0 | WORD32 check_tile_wd; |
1347 | 0 | WORD32 continuous_tiles = 0; //Refers to tiles that are continuous, within a slice, horizontally |
1348 | |
|
1349 | 0 | ps_codec = ps_proc->ps_codec; |
1350 | |
|
1351 | 0 | ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((ps_proc->i4_cur_slice_idx) & (MAX_SLICE_HDR_CNT - 1)); |
1352 | 0 | ps_proc->ps_slice_hdr = ps_slice_hdr; |
1353 | 0 | ps_proc->ps_pps = ps_codec->ps_pps_base + ps_slice_hdr->i1_pps_id; |
1354 | 0 | ps_pps = ps_proc->ps_pps; |
1355 | 0 | ps_proc->ps_sps = ps_codec->ps_sps_base + ps_pps->i1_sps_id; |
1356 | 0 | ps_sps = ps_proc->ps_sps; |
1357 | 0 | ps_proc->i4_init_done = 1; |
1358 | 0 | ctb_size = 1 << ps_sps->i1_log2_ctb_size; |
1359 | 0 | num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE); |
1360 | 0 | num_ctb_in_row = ps_sps->i2_pic_wd_in_ctb; |
1361 | |
|
1362 | 0 | ps_proc->s_sao_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx; |
1363 | |
|
1364 | 0 | ihevcd_get_tile_pos(ps_pps, ps_sps, ps_proc->i4_ctb_x, ps_proc->i4_ctb_y, |
1365 | 0 | &ps_proc->i4_ctb_tile_x, &ps_proc->i4_ctb_tile_y, |
1366 | 0 | &tile_idx); |
1367 | |
|
1368 | 0 | ps_proc->ps_tile = ps_pps->ps_tile + tile_idx; |
1369 | 0 | ps_proc->i4_cur_tile_idx = tile_idx; |
1370 | 0 | ps_tile = ps_proc->ps_tile; |
1371 | |
|
1372 | 0 | if(ps_pps->i1_tiles_enabled_flag) |
1373 | 0 | { |
1374 | 0 | if(tile_idx) |
1375 | 0 | ps_tile_prev = ps_tile - 1; |
1376 | 0 | else |
1377 | 0 | ps_tile_prev = ps_tile; |
1378 | |
|
1379 | 0 | slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb); |
1380 | 0 | tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb); |
1381 | | |
1382 | | /*Check if |
1383 | | * 1. Last tile that ends in frame boundary and 1st tile in next row belongs to same slice |
1384 | | * 1.1. If it does, check if the slice that has these tiles spans across the frame row. |
1385 | | * 2. Vertical tiles are present within a slice */ |
1386 | 0 | if(((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x) && (ps_slice_hdr->i2_ctb_y != ps_tile->u1_pos_y))) |
1387 | 0 | { |
1388 | 0 | continuous_tiles = 1; |
1389 | 0 | } |
1390 | 0 | else |
1391 | 0 | { |
1392 | 0 | check_tile_wd = ps_slice_hdr->i2_ctb_x + ps_tile_prev->u2_wd; |
1393 | 0 | if(!(((check_tile_wd >= ps_sps->i2_pic_wd_in_ctb) && (check_tile_wd % ps_sps->i2_pic_wd_in_ctb == ps_tile->u1_pos_x)) |
1394 | 0 | || ((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x)))) |
1395 | 0 | { |
1396 | 0 | continuous_tiles = 1; |
1397 | 0 | } |
1398 | 0 | } |
1399 | |
|
1400 | 0 | { |
1401 | 0 | WORD32 i2_independent_ctb_x = ps_slice_hdr->i2_independent_ctb_x; |
1402 | 0 | WORD32 i2_independent_ctb_y = ps_slice_hdr->i2_independent_ctb_y; |
1403 | | |
1404 | | /* Handles cases where |
1405 | | * 1. Slices begin at the start of each tile |
1406 | | * 2. Tiles lie in the same slice row.i.e, starting tile_x > slice_x, but tile_y == slice_y |
1407 | | * */ |
1408 | 0 | if(ps_proc->i4_ctb_x >= i2_independent_ctb_x) |
1409 | 0 | { |
1410 | 0 | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - i2_independent_ctb_x; |
1411 | 0 | } |
1412 | 0 | else |
1413 | 0 | { |
1414 | | /* Indicates multiple tiles in a slice case where |
1415 | | * The new tile belongs to an older slice that started in the previous rows-not the present row |
1416 | | * & (tile_y > slice_y and tile_x < slice_x) |
1417 | | */ |
1418 | 0 | if((slice_start_ctb_idx < tile_start_ctb_idx) && (continuous_tiles)) |
1419 | 0 | { |
1420 | 0 | i4_wd_in_ctb = ps_sps->i2_pic_wd_in_ctb; |
1421 | 0 | } |
1422 | | /* Indicates many-tiles-in-one-slice case, for slices that end without spanning the frame width*/ |
1423 | 0 | else |
1424 | 0 | { |
1425 | 0 | i4_wd_in_ctb = ps_tile->u2_wd; |
1426 | 0 | } |
1427 | |
|
1428 | 0 | if(continuous_tiles) |
1429 | 0 | { |
1430 | 0 | ps_proc->i4_ctb_slice_x = i4_wd_in_ctb |
1431 | 0 | - (i2_independent_ctb_x - ps_proc->i4_ctb_x); |
1432 | 0 | } |
1433 | 0 | else |
1434 | 0 | { |
1435 | 0 | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - ps_tile->u1_pos_x; |
1436 | 0 | } |
1437 | 0 | } |
1438 | | /* Initialize ctb slice y to zero and at the start of slice row initialize it |
1439 | | to difference between ctb_y and slice's start ctb y */ |
1440 | |
|
1441 | 0 | ps_proc->i4_ctb_slice_y = ps_proc->i4_ctb_y - i2_independent_ctb_y; |
1442 | | |
1443 | | /*If beginning of tile, check if slice counters are set correctly*/ |
1444 | 0 | if((0 == ps_proc->i4_ctb_tile_x) && (0 == ps_proc->i4_ctb_tile_y)) |
1445 | 0 | { |
1446 | 0 | if(ps_slice_hdr->i1_dependent_slice_flag) |
1447 | 0 | { |
1448 | 0 | ps_proc->i4_ctb_slice_x = 0; |
1449 | 0 | ps_proc->i4_ctb_slice_y = 0; |
1450 | 0 | } |
1451 | | /*For slices that span across multiple tiles*/ |
1452 | 0 | else if(slice_start_ctb_idx < tile_start_ctb_idx) |
1453 | 0 | { |
1454 | 0 | ps_proc->i4_ctb_slice_y = ps_tile->u1_pos_y - i2_independent_ctb_y; |
1455 | | /* Two Cases |
1456 | | * 1 - slice spans across frame-width- but dose not start from 1st column |
1457 | | * 2 - Slice spans across multiple tiles anywhere is a frame |
1458 | | */ |
1459 | | /*TODO:In a multiple slice clip, if an independent slice span across more than 2 tiles in a row, it is not supported*/ |
1460 | 0 | if(continuous_tiles) //Case 2-implemented for slices that span not more than 2 tiles |
1461 | 0 | { |
1462 | 0 | if(i2_independent_ctb_y <= ps_tile->u1_pos_y) |
1463 | 0 | { |
1464 | | //Check if ctb x is before or after |
1465 | 0 | if(i2_independent_ctb_x > ps_tile->u1_pos_x) |
1466 | 0 | { |
1467 | 0 | ps_proc->i4_ctb_slice_y -= 1; |
1468 | 0 | } |
1469 | 0 | } |
1470 | 0 | } |
1471 | 0 | } |
1472 | 0 | } |
1473 | | //Slice starts from a column which is not the starting tile-column, but is within the tile |
1474 | 0 | if(((i2_independent_ctb_x - ps_tile->u1_pos_x) != 0) && ((ps_proc->i4_ctb_slice_y != 0)) |
1475 | 0 | && ((i2_independent_ctb_x >= ps_tile->u1_pos_x) && (i2_independent_ctb_x < ps_tile->u1_pos_x + ps_tile->u2_wd))) |
1476 | 0 | { |
1477 | 0 | ps_proc->i4_ctb_slice_y -= 1; |
1478 | 0 | } |
1479 | 0 | } |
1480 | 0 | } |
1481 | 0 | else |
1482 | 0 | { |
1483 | 0 | WORD32 i2_independent_ctb_x = ps_slice_hdr->i2_independent_ctb_x; |
1484 | 0 | WORD32 i2_independent_ctb_y = ps_slice_hdr->i2_independent_ctb_y; |
1485 | | |
1486 | |
|
1487 | 0 | { |
1488 | 0 | ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - i2_independent_ctb_x; |
1489 | 0 | ps_proc->i4_ctb_slice_y = ps_proc->i4_ctb_y - i2_independent_ctb_y; |
1490 | 0 | if(ps_proc->i4_ctb_slice_x < 0) |
1491 | 0 | { |
1492 | 0 | ps_proc->i4_ctb_slice_x += ps_sps->i2_pic_wd_in_ctb; |
1493 | 0 | ps_proc->i4_ctb_slice_y -= 1; |
1494 | 0 | } |
1495 | | |
1496 | | /* Initialize ctb slice y to zero and at the start of slice row initialize it |
1497 | | to difference between ctb_y and slice's start ctb y */ |
1498 | 0 | } |
1499 | 0 | } |
1500 | | |
1501 | | /* Compute TU offset for the current CTB set */ |
1502 | 0 | { |
1503 | |
|
1504 | 0 | WORD32 ctb_luma_min_tu_cnt; |
1505 | 0 | WORD32 ctb_addr; |
1506 | |
|
1507 | 0 | ctb_addr = ps_proc->i4_ctb_y * num_ctb_in_row + ps_proc->i4_ctb_x; |
1508 | |
|
1509 | 0 | ctb_luma_min_tu_cnt = (1 << ps_sps->i1_log2_ctb_size) / MIN_TU_SIZE; |
1510 | 0 | ctb_luma_min_tu_cnt *= ctb_luma_min_tu_cnt; |
1511 | |
|
1512 | 0 | ps_proc->pu1_tu_map = ps_proc->pu1_pic_tu_map |
1513 | 0 | + ctb_luma_min_tu_cnt * ctb_addr; |
1514 | 0 | if(1 == ps_codec->i4_num_cores) |
1515 | 0 | { |
1516 | 0 | ps_proc->ps_tu = ps_proc->ps_pic_tu + ps_proc->pu4_pic_tu_idx[ctb_addr % RESET_TU_BUF_NCTB]; |
1517 | 0 | } |
1518 | 0 | else |
1519 | 0 | { |
1520 | 0 | ps_proc->ps_tu = ps_proc->ps_pic_tu + ps_proc->pu4_pic_tu_idx[ctb_addr]; |
1521 | 0 | } |
1522 | 0 | ps_proc->pv_tu_coeff_data = (UWORD8 *)ps_proc->pv_pic_tu_coeff_data |
1523 | 0 | + tu_coeff_data_ofst; |
1524 | |
|
1525 | 0 | } |
1526 | | |
1527 | | /* Compute PU related elements for the current CTB set */ |
1528 | 0 | { |
1529 | 0 | WORD32 pu_idx; |
1530 | 0 | ctb_addr = ps_proc->i4_ctb_y * num_ctb_in_row + ps_proc->i4_ctb_x; |
1531 | 0 | pu_idx = ps_proc->pu4_pic_pu_idx[ctb_addr]; |
1532 | 0 | ps_proc->pu1_pu_map = ps_proc->pu1_pic_pu_map |
1533 | 0 | + ctb_addr * num_minpu_in_ctb; |
1534 | 0 | ps_proc->ps_pu = ps_proc->ps_pic_pu + pu_idx; |
1535 | 0 | } |
1536 | | |
1537 | | /* Number of ctbs processed in one loop of process function */ |
1538 | 0 | { |
1539 | 0 | ps_proc->i4_nctb = MIN(ps_codec->u4_nctb, ps_tile->u2_wd); |
1540 | 0 | } |
1541 | |
|
1542 | 0 | } |
1543 | | void ihevcd_process_thread(process_ctxt_t *ps_proc) |
1544 | 0 | { |
1545 | 0 | IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; |
1546 | 0 | { |
1547 | 0 | ithread_set_affinity(ps_proc->i4_id + 1); |
1548 | 0 | } |
1549 | |
|
1550 | 0 | while(1) |
1551 | 0 | { |
1552 | 0 | codec_t *ps_dec = ps_proc->ps_codec; |
1553 | 0 | if(ps_proc->ps_codec->i4_threads_active) |
1554 | 0 | { |
1555 | 0 | DEBUG("In ihevcd_process_thread \n"); |
1556 | |
|
1557 | 0 | ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); |
1558 | 0 | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1559 | 0 | break; |
1560 | | |
1561 | 0 | while(!ps_dec->ai4_process_start[ps_proc->i4_id]) |
1562 | 0 | { |
1563 | 0 | ithread_cond_wait(ps_dec->apv_proc_start_condition[ps_proc->i4_id], |
1564 | 0 | ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); |
1565 | 0 | } |
1566 | 0 | ps_dec->ai4_process_start[ps_proc->i4_id] = 0; |
1567 | 0 | ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); |
1568 | 0 | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1569 | 0 | break; |
1570 | | |
1571 | 0 | DEBUG(" Got control at ihevcd_process_thread \n"); |
1572 | |
|
1573 | 0 | if(ps_dec->i4_break_threads == 1) |
1574 | 0 | break; |
1575 | 0 | } |
1576 | 0 | while(1) |
1577 | 0 | { |
1578 | 0 | proc_job_t s_job; |
1579 | |
|
1580 | 0 | ret = ihevcd_jobq_dequeue((jobq_t *)ps_proc->pv_proc_jobq, &s_job, |
1581 | 0 | sizeof(proc_job_t), 1); |
1582 | 0 | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1583 | 0 | break; |
1584 | | |
1585 | 0 | ps_proc->i4_ctb_cnt = s_job.i2_ctb_cnt; |
1586 | 0 | ps_proc->i4_ctb_x = s_job.i2_ctb_x; |
1587 | 0 | ps_proc->i4_ctb_y = s_job.i2_ctb_y; |
1588 | 0 | ps_proc->i4_cur_slice_idx = s_job.i2_slice_idx; |
1589 | | |
1590 | | |
1591 | |
|
1592 | 0 | if(CMD_PROCESS == s_job.i4_cmd) |
1593 | 0 | { |
1594 | 0 | ihevcd_init_proc_ctxt(ps_proc, s_job.i4_tu_coeff_data_ofst); |
1595 | 0 | ihevcd_process(ps_proc); |
1596 | 0 | } |
1597 | 0 | else if(CMD_FMTCONV == s_job.i4_cmd) |
1598 | 0 | { |
1599 | 0 | sps_t *ps_sps; |
1600 | 0 | codec_t *ps_codec; |
1601 | 0 | ivd_out_bufdesc_t *ps_out_buffer; |
1602 | 0 | WORD32 num_rows; |
1603 | |
|
1604 | 0 | if(0 == ps_proc->i4_init_done) |
1605 | 0 | { |
1606 | 0 | ihevcd_init_proc_ctxt(ps_proc, 0); |
1607 | 0 | } |
1608 | 0 | ps_sps = ps_proc->ps_sps; |
1609 | 0 | ps_codec = ps_proc->ps_codec; |
1610 | 0 | ps_out_buffer = ps_proc->ps_out_buffer; |
1611 | 0 | num_rows = 1 << ps_sps->i1_log2_ctb_size; |
1612 | |
|
1613 | 0 | num_rows = MIN(num_rows, |
1614 | 0 | (ps_codec->i4_disp_ht - (s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size)) |
1615 | 0 | ); |
1616 | |
|
1617 | 0 | if(num_rows < 0) |
1618 | 0 | num_rows = 0; |
1619 | |
|
1620 | 0 | ihevcd_fmt_conv(ps_proc->ps_codec, ps_proc, ps_out_buffer->pu1_bufs[0], |
1621 | 0 | ps_out_buffer->pu1_bufs[1], ps_out_buffer->pu1_bufs[2], |
1622 | 0 | s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size, num_rows); |
1623 | 0 | } |
1624 | 0 | } |
1625 | 0 | if(ps_proc->ps_codec->i4_threads_active) |
1626 | 0 | { |
1627 | 0 | ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); |
1628 | 0 | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1629 | 0 | break; |
1630 | | |
1631 | 0 | ps_dec->ai4_process_done[ps_proc->i4_id] = 1; |
1632 | 0 | ithread_cond_signal(ps_dec->apv_proc_done_condition[ps_proc->i4_id]); |
1633 | |
|
1634 | 0 | ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); |
1635 | 0 | if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) |
1636 | 0 | break; |
1637 | 0 | } |
1638 | 0 | else |
1639 | 0 | { |
1640 | 0 | break; |
1641 | 0 | } |
1642 | 0 | } |
1643 | | //ithread_exit(0); |
1644 | 0 | return; |
1645 | 0 | } |
1646 | | |