Coverage Report

Created: 2026-06-10 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_process_slice.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevcd_process_slice.c
22
 *
23
 * @brief
24
 *  Contains functions for processing slice data
25
 *
26
 * @author
27
 *  Harish
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
/*****************************************************************************/
37
/* File Includes                                                             */
38
/*****************************************************************************/
39
#include <stdio.h>
40
#include <stddef.h>
41
#include <stdlib.h>
42
#include <string.h>
43
#include <assert.h>
44
45
#include "ihevc_typedefs.h"
46
#include "iv.h"
47
#include "ivd.h"
48
#include "ihevcd_cxa.h"
49
#include "ithread.h"
50
51
#include "ihevc_defs.h"
52
#include "ihevc_debug.h"
53
#include "ihevc_defs.h"
54
#include "ihevc_structs.h"
55
#include "ihevc_macros.h"
56
#include "ihevc_platform_macros.h"
57
#include "ihevc_cabac_tables.h"
58
#include "ihevc_padding.h"
59
#include "ihevc_iquant_itrans_recon.h"
60
#include "ihevc_chroma_iquant_itrans_recon.h"
61
#include "ihevc_recon.h"
62
#include "ihevc_chroma_recon.h"
63
#include "ihevc_iquant_recon.h"
64
#include "ihevc_chroma_iquant_recon.h"
65
#include "ihevc_intra_pred.h"
66
67
#include "ihevc_error.h"
68
#include "ihevc_common_tables.h"
69
#include "ihevc_quant_tables.h"
70
#include "ihevcd_common_tables.h"
71
72
#include "ihevcd_profile.h"
73
#include "ihevcd_trace.h"
74
#include "ihevcd_defs.h"
75
#include "ihevcd_function_selector.h"
76
#include "ihevcd_structs.h"
77
#include "ihevcd_error.h"
78
#include "ihevcd_nal.h"
79
#include "ihevcd_bitstream.h"
80
#include "ihevcd_job_queue.h"
81
#include "ihevcd_utils.h"
82
#include "ihevcd_debug.h"
83
#include "ihevcd_get_mv.h"
84
#include "ihevcd_inter_pred.h"
85
#include "ihevcd_iquant_itrans_recon_ctb.h"
86
#include "ihevcd_boundary_strength.h"
87
#include "ihevcd_deblk.h"
88
#include "ihevcd_fmt_conv.h"
89
#include "ihevcd_sao.h"
90
#include "ihevcd_profile.h"
91
92
IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
93
                               process_ctxt_t *ps_proc,
94
                               UWORD8 *pu1_y_dst,
95
                               UWORD8 *pu1_u_dst,
96
                               UWORD8 *pu1_v_dst,
97
                               WORD32 cur_row,
98
                               WORD32 num_rows);
99
100
typedef enum
101
{
102
    PROC_ALL,
103
    PROC_INTER_PRED,
104
    PROC_RECON,
105
    PROC_DEBLK,
106
    PROC_SAO
107
}proc_type_t;
108
109
void ihevcd_proc_map_check(process_ctxt_t *ps_proc, proc_type_t proc_type, WORD32 nctb)
110
0
{
111
0
    tile_t *ps_tile = ps_proc->ps_tile;
112
0
    sps_t *ps_sps = ps_proc->ps_sps;
113
0
    pps_t *ps_pps = ps_proc->ps_pps;
114
0
    codec_t *ps_codec = ps_proc->ps_codec;
115
0
    WORD32 idx;
116
0
    WORD32 nop_cnt;
117
0
    WORD32 bit_pos = proc_type;
118
0
    WORD32 bit_mask = (1 << bit_pos);
119
120
0
    if(ps_proc->i4_check_proc_status)
121
0
    {
122
0
        nop_cnt = PROC_NOP_CNT;
123
0
        while(1)
124
0
        {
125
0
            volatile UWORD8 *pu1_buf;
126
0
            volatile WORD32 status;
127
0
            status = 1;
128
            /* Check if all dependencies for the next nCTBs are met */
129
0
            {
130
0
                WORD32 x_pos;
131
132
0
                {
133
                    /* Check if the top right of next nCTBs are processed */
134
0
                    if(ps_proc->i4_ctb_y > 0)
135
0
                    {
136
0
                        x_pos = (ps_proc->i4_ctb_tile_x + nctb);
137
0
                        idx = MIN(x_pos, (ps_tile->u2_wd - 1));
138
139
                        /* Check if top-right CTB for the last CTB in nCTB is within the tile */
140
0
                        {
141
0
                            idx += ps_tile->u1_pos_x;
142
0
                            idx += ((ps_proc->i4_ctb_y - 1)
143
0
                                            * ps_sps->i2_pic_wd_in_ctb);
144
0
                            pu1_buf = (ps_codec->pu1_proc_map + idx);
145
0
                            status = *pu1_buf & bit_mask;
146
0
                        }
147
0
                    }
148
0
                }
149
150
                /* If tiles are enabled, then test left and top-left as well */
151
0
                ps_pps = ps_proc->ps_pps;
152
0
                if(ps_pps->i1_tiles_enabled_flag)
153
0
                {
154
                    /*Check if left ctb is processed*/
155
0
                    if((ps_proc->i4_ctb_x > 0) && ((0 != status)))
156
0
                    {
157
0
                        x_pos   = ps_tile->u1_pos_x + ps_proc->i4_ctb_tile_x - 1;
158
0
                        idx     = x_pos + (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
159
0
                        pu1_buf = (ps_codec->pu1_proc_map + idx);
160
0
                        status  = *pu1_buf & bit_mask;
161
0
                    }
162
163
                    /*Check if top left ctb is processed*/
164
0
                    if((ps_proc->i4_ctb_x > 0) && (0 != status) && (ps_proc->i4_ctb_y > 0))
165
0
                    {
166
0
                        x_pos   = ps_tile->u1_pos_x + ps_proc->i4_ctb_tile_x - 1;
167
0
                        idx     = x_pos + ((ps_proc->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
168
0
                        pu1_buf = (ps_codec->pu1_proc_map + idx);
169
0
                        status  = *pu1_buf & bit_mask;
170
0
                    }
171
0
                }
172
0
            }
173
174
0
            if(status)
175
0
                break;
176
177
            /* if dependencies are not met, then wait for few cycles.
178
             * Even after few iterations, if the dependencies are not met then yield
179
             */
180
0
            if(nop_cnt > 0)
181
0
            {
182
0
                NOP(128);
183
0
                nop_cnt -= 128;
184
0
            }
185
0
            else
186
0
            {
187
0
                nop_cnt = PROC_NOP_CNT;
188
0
                ithread_yield();
189
                //NOP(128 * 16);
190
0
            }
191
0
        }
192
0
        DATA_SYNC();
193
0
    }
194
0
}
195
196
void ihevcd_proc_map_update(process_ctxt_t *ps_proc, proc_type_t proc_type, WORD32 nctb)
197
0
{
198
0
    codec_t *ps_codec = ps_proc->ps_codec;
199
0
    WORD32 i, idx;
200
0
    WORD32 bit_pos = proc_type;
201
0
    WORD32 bit_mask = (1 << bit_pos);
202
203
    /* Update the current CTBs processing status */
204
0
    if(ps_proc->i4_check_proc_status)
205
0
    {
206
0
        DATA_SYNC();
207
0
        for(i = 0; i < nctb; i++)
208
0
        {
209
0
            sps_t *ps_sps = ps_proc->ps_sps;
210
0
            UWORD8 *pu1_buf;
211
0
            idx = (ps_proc->i4_ctb_x + i);
212
0
            idx += ((ps_proc->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb);
213
0
            pu1_buf = (ps_codec->pu1_proc_map + idx);
214
0
            *pu1_buf = *pu1_buf | bit_mask;
215
0
        }
216
0
    }
217
0
}
218
219
220
void ihevcd_slice_hdr_update(process_ctxt_t *ps_proc)
221
0
{
222
223
    /* Slice x and y are initialized in proc_init. But initialize slice x and y count here
224
     *  if a new slice begins at the middle of a row since proc_init is invoked only at the beginning of each row */
225
0
    if(!((ps_proc->i4_ctb_x == 0) && (ps_proc->i4_ctb_y == 0)))
226
0
    {
227
0
        slice_header_t *ps_slice_hdr_next = ps_proc->ps_codec->ps_slice_hdr_base + ((ps_proc->i4_cur_slice_idx + 1) & (MAX_SLICE_HDR_CNT - 1));
228
229
0
        if((ps_slice_hdr_next->i2_ctb_x == ps_proc->i4_ctb_x)
230
0
                        && (ps_slice_hdr_next->i2_ctb_y == ps_proc->i4_ctb_y))
231
0
        {
232
0
            if(0 == ps_slice_hdr_next->i1_dependent_slice_flag)
233
0
            {
234
0
                ps_proc->i4_ctb_slice_x = 0;
235
0
                ps_proc->i4_ctb_slice_y = 0;
236
0
            }
237
238
0
            ps_proc->i4_cur_slice_idx++;
239
0
            ps_proc->ps_slice_hdr = ps_slice_hdr_next;
240
0
        }
241
242
0
    }
243
0
}
244
245
void ihevcd_ctb_pos_update(process_ctxt_t *ps_proc, WORD32 nctb)
246
0
{
247
0
    WORD32 tile_start_ctb_idx, slice_start_ctb_idx;
248
0
    slice_header_t *ps_slice_hdr = ps_proc->ps_slice_hdr;
249
0
    tile_t *ps_tile = ps_proc->ps_tile;
250
0
    sps_t *ps_sps = ps_proc->ps_sps;
251
252
    /* Update x and y positions */
253
0
    ps_proc->i4_ctb_tile_x += nctb;
254
0
    ps_proc->i4_ctb_x += nctb;
255
256
0
    ps_proc->i4_ctb_slice_x += nctb;
257
    /*If tile are enabled, then handle the tile & slice counters differently*/
258
0
    if(ps_proc->ps_pps->i1_tiles_enabled_flag)
259
0
    {
260
        /* Update slice counters*/
261
0
        slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb);
262
0
        tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb);
263
        /*
264
         * There can be 2 cases where slice counters must be handled differently.
265
         * 1 - Multiple tiles span across a single/one of the many slice.
266
         * 2 - Multiple slices span across a single/one of the many tiles.
267
         */
268
269
        /*Case 1 */
270
0
        if(slice_start_ctb_idx < tile_start_ctb_idx)
271
0
        {
272
            /*End of tile row*/
273
0
            if(ps_proc->i4_ctb_x > ps_slice_hdr->i2_ctb_x)
274
0
            {
275
0
                if(ps_proc->i4_ctb_slice_x >= (ps_tile->u2_wd + ps_tile->u1_pos_x))
276
0
                {
277
0
                    ps_proc->i4_ctb_slice_y++;
278
0
                    ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x
279
0
                                    - ps_tile->u2_wd;
280
0
                }
281
0
            }
282
0
            else
283
0
            {
284
0
                WORD32 temp_stride = (ps_sps->i2_pic_wd_in_ctb - ps_slice_hdr->i2_ctb_x);
285
0
                if(ps_proc->i4_ctb_slice_x >= (temp_stride + ps_tile->u2_wd + ps_tile->u1_pos_x))
286
0
                {
287
0
                    ps_proc->i4_ctb_slice_y++;
288
0
                    ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x
289
0
                                    - ps_tile->u2_wd;
290
0
                }
291
0
            }
292
0
        }
293
        /*Case 2*/
294
0
        else if(ps_proc->i4_ctb_slice_x >= (ps_tile->u2_wd))
295
0
        {
296
            /*End of tile row*/
297
0
            ps_proc->i4_ctb_slice_y++;
298
0
            ps_proc->i4_ctb_slice_x = 0;
299
0
        }
300
0
    }
301
0
    else
302
0
    {
303
0
        if(ps_proc->i4_ctb_slice_x >= ps_tile->u2_wd)
304
0
        {
305
0
            ps_proc->i4_ctb_slice_y++;
306
0
            ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_slice_x
307
0
                            - ps_tile->u2_wd;
308
0
        }
309
0
    }
310
0
}
311
312
void ihevcd_ctb_avail_update(process_ctxt_t *ps_proc)
313
0
{
314
0
    slice_header_t *ps_slice_hdr = ps_proc->ps_slice_hdr;
315
0
    sps_t *ps_sps = ps_proc->ps_sps;
316
0
    tile_t *ps_tile_prev;
317
0
    tile_t *ps_tile = ps_proc->ps_tile;
318
0
    WORD32 cur_pu_idx;
319
0
    WORD32 tile_start_ctb_idx, slice_start_ctb_idx;
320
0
    WORD16 i2_wd_in_ctb;
321
0
    WORD32 continuous_tiles = 0;
322
0
    WORD32 cur_ctb_idx;
323
0
    WORD32 check_tile_wd;
324
325
0
    if((0 != ps_tile->u1_pos_x) && (0 != ps_tile->u1_pos_y))
326
0
    {
327
0
        ps_tile_prev = ps_tile - 1;
328
0
    }
329
0
    else
330
0
    {
331
0
        ps_tile_prev = ps_tile;
332
0
    }
333
334
335
0
    check_tile_wd = ps_slice_hdr->i2_ctb_x + ps_tile_prev->u2_wd;
336
0
    if(!(((check_tile_wd >= ps_sps->i2_pic_wd_in_ctb) && (check_tile_wd % ps_sps->i2_pic_wd_in_ctb == ps_tile->u1_pos_x))
337
0
                                    || ((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x))))
338
0
    {
339
0
        continuous_tiles = 1;
340
0
    }
341
342
0
    slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb);
343
0
    tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb);
344
345
0
    if((slice_start_ctb_idx < tile_start_ctb_idx) && (continuous_tiles))
346
0
    {
347
        //Slices span across multiple tiles.
348
0
        i2_wd_in_ctb = ps_sps->i2_pic_wd_in_ctb;
349
0
    }
350
0
    else
351
0
    {
352
0
        i2_wd_in_ctb = ps_tile->u2_wd;
353
0
    }
354
0
    cur_ctb_idx = ps_proc->i4_ctb_x
355
0
                    + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
356
357
    /* Ctb level availability */
358
    /* Bottom left will not be available at a CTB level, no need to pass this */
359
0
    ps_proc->u1_top_ctb_avail = 1;
360
0
    ps_proc->u1_left_ctb_avail = 1;
361
0
    ps_proc->u1_top_lt_ctb_avail = 1;
362
0
    ps_proc->u1_top_rt_ctb_avail = 1;
363
    /* slice and tile boundaries */
364
365
0
    if((0 == ps_proc->i4_ctb_y) || (0 == ps_proc->i4_ctb_tile_y))
366
0
    {
367
0
        ps_proc->u1_top_ctb_avail = 0;
368
0
        ps_proc->u1_top_lt_ctb_avail = 0;
369
0
        ps_proc->u1_top_rt_ctb_avail = 0;
370
0
    }
371
372
0
    if((0 == ps_proc->i4_ctb_x) || (0 == ps_proc->i4_ctb_tile_x))
373
0
    {
374
0
        ps_proc->u1_left_ctb_avail = 0;
375
0
        ps_proc->u1_top_lt_ctb_avail = 0;
376
0
        if((0 == ps_proc->i4_ctb_slice_y) || (0 == ps_proc->i4_ctb_tile_y))
377
0
        {
378
0
            ps_proc->u1_top_ctb_avail = 0;
379
0
            if((i2_wd_in_ctb - 1) != ps_proc->i4_ctb_slice_x)
380
0
            {
381
0
                ps_proc->u1_top_rt_ctb_avail = 0;
382
0
            }
383
0
        }
384
0
    }
385
    /*For slices not beginning at start of a ctb row*/
386
0
    else if(ps_proc->i4_ctb_x > 0)
387
0
    {
388
0
        if((0 == ps_proc->i4_ctb_slice_y) || (0 == ps_proc->i4_ctb_tile_y))
389
0
        {
390
0
            ps_proc->u1_top_ctb_avail = 0;
391
0
            ps_proc->u1_top_lt_ctb_avail = 0;
392
0
            if(0 == ps_proc->i4_ctb_slice_x)
393
0
            {
394
0
                ps_proc->u1_left_ctb_avail = 0;
395
0
            }
396
0
            if((i2_wd_in_ctb - 1) != ps_proc->i4_ctb_slice_x)
397
0
            {
398
0
                ps_proc->u1_top_rt_ctb_avail = 0;
399
0
            }
400
0
        }
401
0
        else if((1 == ps_proc->i4_ctb_slice_y) && (0 == ps_proc->i4_ctb_slice_x))
402
0
        {
403
0
            ps_proc->u1_top_lt_ctb_avail = 0;
404
0
        }
405
0
    }
406
407
0
    if((ps_proc->i4_ctb_x == (ps_sps->i2_pic_wd_in_ctb - 1)) || ((ps_tile->u2_wd - 1) == ps_proc->i4_ctb_tile_x))
408
0
    {
409
0
        ps_proc->u1_top_rt_ctb_avail = 0;
410
0
    }
411
412
413
0
    {
414
0
        WORD32 next_ctb_idx;
415
0
        next_ctb_idx = cur_ctb_idx + 1;
416
417
0
        if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1))
418
0
        {
419
0
            if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht)
420
0
            {
421
                //Last tile
422
0
                if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb))
423
0
                {
424
0
                    next_ctb_idx = cur_ctb_idx + 1;
425
0
                }
426
0
                else //Not last tile, but new tile
427
0
                {
428
0
                    tile_t *ps_tile_next = ps_proc->ps_tile + 1;
429
0
                    next_ctb_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb);
430
0
                }
431
0
            }
432
0
            else //End of each tile row
433
0
            {
434
0
                next_ctb_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x;
435
0
            }
436
0
        }
437
0
        ps_proc->i4_next_pu_ctb_cnt = next_ctb_idx;
438
0
        ps_proc->i4_ctb_pu_cnt =
439
0
                        ps_proc->pu4_pic_pu_idx[next_ctb_idx]
440
0
                        - ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
441
0
        cur_pu_idx = ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
442
0
        ps_proc->i4_ctb_start_pu_idx = cur_pu_idx;
443
0
        ps_proc->ps_pu = &ps_proc->ps_pic_pu[cur_pu_idx];
444
0
    }
445
0
}
446
447
void ihevcd_update_ctb_tu_cnt(process_ctxt_t *ps_proc)
448
0
{
449
0
    sps_t *ps_sps = ps_proc->ps_sps;
450
0
    codec_t *ps_codec = ps_proc->ps_codec;
451
0
    WORD32 cur_ctb_idx;
452
453
0
    cur_ctb_idx = ps_proc->i4_ctb_x
454
0
                    + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
455
456
0
    {
457
0
        tile_t *ps_tile;
458
0
        WORD32 next_ctb_tu_idx;
459
0
        ps_tile = ps_proc->ps_tile;
460
461
462
0
        if(1 == ps_codec->i4_num_cores)
463
0
        {
464
0
            next_ctb_tu_idx = cur_ctb_idx % RESET_TU_BUF_NCTB + 1;
465
0
            if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1))
466
0
            {
467
0
                if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht)
468
0
                {
469
                    //Last tile
470
0
                    if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb))
471
0
                    {
472
0
                        next_ctb_tu_idx = (cur_ctb_idx % RESET_TU_BUF_NCTB) + 1;
473
0
                    }
474
0
                    else //Not last tile, but new tile
475
0
                    {
476
0
                        tile_t *ps_tile_next = ps_proc->ps_tile + 1;
477
0
                        next_ctb_tu_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb);
478
0
                    }
479
0
                }
480
0
                else //End of each tile row
481
0
                {
482
0
                    next_ctb_tu_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x;
483
0
                }
484
0
            }
485
0
            ps_proc->i4_next_tu_ctb_cnt = next_ctb_tu_idx;
486
0
            ps_proc->i4_ctb_tu_cnt = ps_proc->pu4_pic_tu_idx[next_ctb_tu_idx] - ps_proc->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
487
0
        }
488
0
        else
489
0
        {
490
0
            next_ctb_tu_idx = cur_ctb_idx + 1;
491
0
            if(ps_tile->u2_wd == (ps_proc->i4_ctb_tile_x + 1))
492
0
            {
493
0
                if((ps_proc->i4_ctb_tile_y + 1) == ps_tile->u2_ht)
494
0
                {
495
                    //Last tile
496
0
                    if(((ps_proc->i4_ctb_tile_y + 1 + ps_tile->u1_pos_y) == ps_sps->i2_pic_ht_in_ctb) && ((ps_proc->i4_ctb_tile_x + 1 + ps_tile->u1_pos_x) == ps_sps->i2_pic_wd_in_ctb))
497
0
                    {
498
0
                        next_ctb_tu_idx = (cur_ctb_idx % RESET_TU_BUF_NCTB) + 1;
499
0
                    }
500
0
                    else //Not last tile, but new tile
501
0
                    {
502
0
                        tile_t *ps_tile_next = ps_proc->ps_tile + 1;
503
0
                        next_ctb_tu_idx = ps_tile_next->u1_pos_x + (ps_tile_next->u1_pos_y * ps_sps->i2_pic_wd_in_ctb);
504
0
                    }
505
0
                }
506
0
                else //End of each tile row
507
0
                {
508
0
                    next_ctb_tu_idx = ((ps_tile->u1_pos_y + ps_proc->i4_ctb_tile_y + 1) * ps_sps->i2_pic_wd_in_ctb) + ps_tile->u1_pos_x;
509
0
                }
510
0
            }
511
0
            ps_proc->i4_next_tu_ctb_cnt = next_ctb_tu_idx;
512
0
            ps_proc->i4_ctb_tu_cnt = ps_proc->pu4_pic_tu_idx[next_ctb_tu_idx] -
513
0
                            ps_proc->pu4_pic_tu_idx[cur_ctb_idx];
514
0
        }
515
0
    }
516
0
}
517
518
IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc)
519
0
{
520
0
    IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
521
0
    codec_t *ps_codec;
522
0
    sps_t *ps_sps = ps_proc->ps_sps;
523
524
0
    WORD32 nctb;
525
0
    WORD32 i;
526
0
    WORD32 idx;
527
0
    WORD32 nop_cnt;
528
0
    WORD32 num_minpu_in_ctb;
529
0
    WORD32 cur_slice_idx, cur_ctb_tile_x, cur_ctb_slice_x, cur_ctb_tile_y, cur_ctb_slice_y;
530
0
    WORD32 nxt_ctb_slice_y, nxt_ctb_slice_x;
531
0
    tu_t *ps_tu_cur, *ps_tu_nxt;
532
0
    UWORD8 *pu1_pu_map_cur, *pu1_pu_map_nxt;
533
0
    WORD32 num_ctb, num_ctb_tmp;
534
0
    proc_type_t proc_type;
535
536
537
0
    WORD32 ctb_size = 1 << ps_sps->i1_log2_ctb_size;
538
539
0
    WORD32 h_samp_factor, v_samp_factor;
540
0
    WORD32 chroma_row_strd;
541
0
    WORD32 chroma_pixel_strd = 2;
542
543
0
    h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2;
544
0
    v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1;
545
546
0
    PROFILE_DISABLE_PROCESS_CTB();
547
548
0
    ps_codec = ps_proc->ps_codec;
549
0
    num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE);
550
551
0
    nctb = MIN(ps_codec->i4_proc_nctb, ps_proc->i4_ctb_cnt);
552
0
    nctb = MIN(nctb, (ps_proc->ps_tile->u2_wd - ps_proc->i4_ctb_tile_x));
553
554
0
    if(ps_proc->i4_cur_slice_idx > (MAX_SLICE_HDR_CNT - 2 * ps_sps->i2_pic_wd_in_ctb))
555
0
    {
556
0
        num_ctb = 1;
557
0
    }
558
0
    else
559
0
    {
560
0
        num_ctb = ps_proc->i4_nctb;
561
0
    }
562
0
    nxt_ctb_slice_y = ps_proc->i4_ctb_slice_y;
563
0
    nxt_ctb_slice_x = ps_proc->i4_ctb_slice_x;
564
0
    pu1_pu_map_nxt = ps_proc->pu1_pu_map;
565
0
    ps_tu_nxt = ps_proc->ps_tu;
566
567
0
    while(ps_proc->i4_ctb_cnt)
568
0
    {
569
0
        ps_proc->i4_ctb_slice_y = nxt_ctb_slice_y;
570
0
        ps_proc->i4_ctb_slice_x = nxt_ctb_slice_x;
571
0
        ps_proc->pu1_pu_map = pu1_pu_map_nxt;
572
0
        ps_proc->ps_tu = ps_tu_nxt;
573
574
0
        cur_ctb_tile_x = ps_proc->i4_ctb_tile_x;
575
0
        cur_ctb_tile_y = ps_proc->i4_ctb_tile_y;
576
0
        cur_ctb_slice_x = ps_proc->i4_ctb_slice_x;
577
0
        cur_ctb_slice_y = ps_proc->i4_ctb_slice_y;
578
0
        cur_slice_idx = ps_proc->i4_cur_slice_idx;
579
0
        ps_tu_cur = ps_proc->ps_tu;
580
0
        pu1_pu_map_cur = ps_proc->pu1_pu_map;
581
0
        proc_type = PROC_INTER_PRED;
582
583
0
        if(ps_proc->i4_ctb_cnt < num_ctb)
584
0
        {
585
0
            num_ctb = ps_proc->i4_ctb_cnt;
586
0
        }
587
0
        num_ctb_tmp = num_ctb;
588
589
0
        while(num_ctb_tmp)
590
0
        {
591
0
            slice_header_t *ps_slice_hdr;
592
0
            tile_t *ps_tile = ps_proc->ps_tile;
593
594
            /* Waiting for Parsing to be done*/
595
0
            {
596
597
598
0
                nop_cnt = PROC_NOP_CNT;
599
0
                if(ps_proc->i4_check_parse_status || ps_proc->i4_check_proc_status)
600
0
                {
601
0
                    while(1)
602
0
                    {
603
0
                        volatile UWORD8 *pu1_buf;
604
0
                        volatile WORD32 status;
605
0
                        status = 1;
606
                        /* Check if all dependencies for the next nCTBs are met */
607
                        /* Check if the next nCTBs are parsed */
608
0
                        if(ps_proc->i4_check_parse_status)
609
0
                        {
610
0
                            idx = (ps_proc->i4_ctb_x + nctb - 1);
611
0
                            idx += (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
612
0
                            pu1_buf = (ps_codec->pu1_parse_map + idx);
613
0
                            status = *pu1_buf;
614
0
                        }
615
616
0
                        if(status)
617
0
                            break;
618
619
                        /* if dependencies are not met, then wait for few cycles.
620
                         * Even after few iterations, if the dependencies are not met then yield
621
                         */
622
0
                        if(nop_cnt > 0)
623
0
                        {
624
0
                            NOP(128);
625
0
                            nop_cnt -= 128;
626
0
                        }
627
0
                        else
628
0
                        {
629
0
                            nop_cnt = PROC_NOP_CNT;
630
0
                            ithread_yield();
631
0
                        }
632
0
                    }
633
0
                }
634
0
            }
635
636
            /* Check proc map to ensure dependencies for recon are met */
637
0
            ihevcd_proc_map_check(ps_proc, proc_type, nctb);
638
639
0
            ihevcd_slice_hdr_update(ps_proc);
640
0
            ps_slice_hdr = ps_proc->ps_slice_hdr;
641
642
            //ihevcd_mv_prediction();
643
            //ihevcd_lvl_unpack();
644
            //ihevcd_inter_iq_it_recon();
645
            //Following does prediction, iq, it and recon on a TU by TU basis for intra TUs
646
            //ihevcd_intra_process();
647
            //ihevcd_ctb_boundary_strength_islice(ps_proc, ctb_size);
648
            //ihevcd_deblk_ctb(ps_proc);
649
650
            /* iq,it recon of Intra TU */
651
0
            {
652
0
                UWORD32 *pu4_ctb_top_pu_idx, *pu4_ctb_left_pu_idx, *pu4_ctb_top_left_pu_idx;
653
0
                WORD32 cur_ctb_idx;
654
655
0
                ihevcd_ctb_avail_update(ps_proc);
656
657
#if DEBUG_DUMP_FRAME_BUFFERS_INFO
658
                au1_pic_avail_ctb_flags[ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb] =
659
                                ((ps_proc->u1_top_ctb_avail << 3) | (ps_proc->u1_left_ctb_avail << 2) | (ps_proc->u1_top_lt_ctb_avail << 1) | (ps_proc->u1_top_rt_ctb_avail));
660
                au4_pic_ctb_slice_xy[ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb] =
661
                                (((UWORD16)ps_proc->i4_ctb_slice_x << 16) | ((UWORD16)ps_proc->i4_ctb_slice_y << 16));
662
#endif
663
664
                /*************************************************/
665
                /****************   MV pred **********************/
666
                /*************************************************/
667
0
                if(PSLICE == ps_slice_hdr->i1_slice_type
668
0
                                || BSLICE == ps_slice_hdr->i1_slice_type)
669
0
                {
670
0
                    mv_ctxt_t s_mv_ctxt;
671
672
0
                    pu4_ctb_top_pu_idx = ps_proc->pu4_pic_pu_idx_top
673
0
                                    + (ps_proc->i4_ctb_x * ctb_size / MIN_PU_SIZE);
674
0
                    pu4_ctb_left_pu_idx = ps_proc->pu4_pic_pu_idx_left;
675
0
                    pu4_ctb_top_left_pu_idx = &ps_proc->u4_ctb_top_left_pu_idx;
676
677
                    /* Initializing s_mv_ctxt */
678
0
                    if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD)
679
0
                    {
680
0
                        s_mv_ctxt.ps_pps = ps_proc->ps_pps;
681
0
                        s_mv_ctxt.ps_sps = ps_proc->ps_sps;
682
0
                        s_mv_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr;
683
0
                        s_mv_ctxt.i4_ctb_x = ps_proc->i4_ctb_x;
684
0
                        s_mv_ctxt.i4_ctb_y = ps_proc->i4_ctb_y;
685
0
                        s_mv_ctxt.ps_pu = ps_proc->ps_pu;
686
0
                        s_mv_ctxt.ps_pic_pu = ps_proc->ps_pic_pu;
687
0
                        s_mv_ctxt.ps_tile = ps_tile;
688
0
                        s_mv_ctxt.pu4_pic_pu_idx_map = ps_proc->pu4_pic_pu_idx_map;
689
0
                        s_mv_ctxt.pu4_pic_pu_idx = ps_proc->pu4_pic_pu_idx;
690
0
                        s_mv_ctxt.pu1_pic_pu_map = ps_proc->pu1_pic_pu_map;
691
0
                        s_mv_ctxt.i4_ctb_pu_cnt = ps_proc->i4_ctb_pu_cnt;
692
0
                        s_mv_ctxt.i4_ctb_start_pu_idx = ps_proc->i4_ctb_start_pu_idx;
693
0
                        s_mv_ctxt.u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
694
0
                        s_mv_ctxt.u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
695
0
                        s_mv_ctxt.u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
696
0
                        s_mv_ctxt.u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
697
698
0
                        ihevcd_get_mv_ctb(&s_mv_ctxt, pu4_ctb_top_pu_idx,
699
0
                                          pu4_ctb_left_pu_idx, pu4_ctb_top_left_pu_idx);
700
0
                    }
701
702
0
                    ihevcd_inter_pred_ctb(ps_proc);
703
0
                }
704
0
                else if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD)
705
0
                {
706
0
                    WORD32 next_ctb_idx, num_pu_per_ctb, ctb_start_pu_idx, pu_cnt;
707
0
                    pu_t *ps_pu;
708
0
                    WORD32 num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE);
709
0
                    UWORD8 *pu1_pic_pu_map_ctb = ps_proc->pu1_pic_pu_map +
710
0
                                    (ps_proc->i4_ctb_x + ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb) * num_minpu_in_ctb;
711
0
                    WORD32 row, col;
712
0
                    UWORD32 *pu4_nbr_pu_idx = ps_proc->pu4_pic_pu_idx_map;
713
0
                    WORD32 nbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
714
0
                    WORD32 ctb_size_in_min_pu = (ctb_size / MIN_PU_SIZE);
715
716
                    /* Neighbor PU idx update inside CTB */
717
                    /* 1byte per 4x4. Indicates the PU idx that 4x4 block belongs to */
718
719
0
                    cur_ctb_idx = ps_proc->i4_ctb_x
720
0
                                    + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
721
0
                    next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt;
722
0
                    num_pu_per_ctb = ps_proc->pu4_pic_pu_idx[next_ctb_idx]
723
0
                                    - ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
724
0
                    ctb_start_pu_idx = ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
725
0
                    ps_pu = &ps_proc->ps_pic_pu[ctb_start_pu_idx];
726
727
0
                    for(pu_cnt = 0; pu_cnt < num_pu_per_ctb; pu_cnt++, ps_pu++)
728
0
                    {
729
0
                        UWORD32 cur_pu_idx;
730
0
                        WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
731
0
                        WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
732
733
0
                        cur_pu_idx = ctb_start_pu_idx + pu_cnt;
734
735
0
                        for(row = 0; row < pu_ht / MIN_PU_SIZE; row++)
736
0
                            for(col = 0; col < pu_wd / MIN_PU_SIZE; col++)
737
0
                                pu4_nbr_pu_idx[(1 + ps_pu->b4_pos_x + col)
738
0
                                                + (1 + ps_pu->b4_pos_y + row)
739
0
                                                * nbr_pu_idx_strd] =
740
0
                                                cur_pu_idx;
741
0
                    }
742
743
                    /* Updating Top and Left pointers */
744
0
                    {
745
0
                        WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
746
0
                                        - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
747
0
                        WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
748
749
                        /* Top Left */
750
                        /* saving top left before updating top ptr, as updating top ptr will overwrite the top left for the next ctb */
751
0
                        ps_proc->u4_ctb_top_left_pu_idx = ps_proc->pu4_pic_pu_idx_top[((ps_proc->i4_ctb_x + 1) * ctb_size / MIN_PU_SIZE) - 1];
752
0
                        for(i = 0; i < ctb_size / MIN_PU_SIZE; i++)
753
0
                        {
754
                            /* Left */
755
                            /* Last column of au4_nbr_pu_idx */
756
0
                            ps_proc->pu4_pic_pu_idx_left[i] =
757
0
                                            pu4_nbr_pu_idx[(ctb_size / MIN_PU_SIZE) + (i + 1) * nbr_pu_idx_strd];
758
                            /* Top */
759
                            /* Last row of au4_nbr_pu_idx */
760
0
                            ps_proc->pu4_pic_pu_idx_top[(ps_proc->i4_ctb_x * ctb_size / MIN_PU_SIZE) + i] =
761
0
                                            pu4_nbr_pu_idx[(ctb_size_left / MIN_PU_SIZE) * nbr_pu_idx_strd + i + 1];
762
763
0
                        }
764
765
                        /* Updating the CTB level PU idx (Used for collocated MV pred)*/
766
0
                        {
767
0
                            WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map;
768
0
                            WORD32 first_pu_of_ctb;
769
0
                            first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd];
770
0
                            UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples
771
0
                                        - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
772
0
                            UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples
773
0
                                        - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
774
775
0
                            index_pic_map = 0 * ctb_size_in_min_pu + 0;
776
0
                            index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1);
777
778
0
                            for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++)
779
0
                            {
780
0
                                for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++)
781
0
                                {
782
0
                                    pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col]
783
0
                                                    - first_pu_of_ctb;
784
0
                                }
785
0
                                index_pic_map += ctb_size_in_min_pu;
786
0
                                index_nbr_map += nbr_pu_idx_strd;
787
0
                            }
788
0
                        }
789
0
                    }
790
0
                }
791
0
            }
792
793
0
            if(ps_proc->ps_pps->i1_tiles_enabled_flag)
794
0
            {
795
                /*Update the tile index buffer with tile information for the current ctb*/
796
0
                UWORD16 *pu1_tile_idx = ps_proc->pu1_tile_idx;
797
0
                pu1_tile_idx[(ps_proc->i4_ctb_x + (ps_proc->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb))]
798
0
                                = ps_proc->i4_cur_tile_idx;
799
0
            }
800
801
            /*************************************************/
802
            /*********** BS, QP and Deblocking  **************/
803
            /*************************************************/
804
            /* Boundary strength call has to be after IQ IT recon since QP population needs ps_proc->i4_qp_const_inc_ctb flag */
805
806
0
            {
807
0
                slice_header_t *ps_slice_hdr;
808
0
                ps_slice_hdr = ps_proc->ps_slice_hdr;
809
810
811
                /* Check if deblock is disabled for the current slice or if it is disabled for the current picture
812
                 * because of disable deblock api
813
                 */
814
0
                if(0 == ps_codec->i4_disable_deblk_pic)
815
0
                {
816
0
                    if(ps_codec->i4_num_cores > MV_PRED_NUM_CORES_THRESHOLD)
817
0
                    {
818
                        /* Boundary strength calculation is done irrespective of whether deblocking is disabled
819
                         * in the slice or not, to handle deblocking slice boundaries */
820
0
                        if((0 == ps_codec->i4_slice_error))
821
0
                        {
822
0
                            ihevcd_update_ctb_tu_cnt(ps_proc);
823
0
                            ps_proc->s_bs_ctxt.ps_pps = ps_proc->ps_pps;
824
0
                            ps_proc->s_bs_ctxt.ps_sps = ps_proc->ps_sps;
825
0
                            ps_proc->s_bs_ctxt.ps_codec = ps_proc->ps_codec;
826
0
                            ps_proc->s_bs_ctxt.i4_ctb_tu_cnt = ps_proc->i4_ctb_tu_cnt;
827
0
                            ps_proc->s_bs_ctxt.i4_ctb_x = ps_proc->i4_ctb_x;
828
0
                            ps_proc->s_bs_ctxt.i4_ctb_y = ps_proc->i4_ctb_y;
829
0
                            ps_proc->s_bs_ctxt.i4_ctb_tile_x = ps_proc->i4_ctb_tile_x;
830
0
                            ps_proc->s_bs_ctxt.i4_ctb_tile_y = ps_proc->i4_ctb_tile_y;
831
0
                            ps_proc->s_bs_ctxt.i4_ctb_slice_x = ps_proc->i4_ctb_slice_x;
832
0
                            ps_proc->s_bs_ctxt.i4_ctb_slice_y = ps_proc->i4_ctb_slice_y;
833
0
                            ps_proc->s_bs_ctxt.ps_tu = ps_proc->ps_tu;
834
0
                            ps_proc->s_bs_ctxt.ps_pu = ps_proc->ps_pu;
835
0
                            ps_proc->s_bs_ctxt.pu4_pic_pu_idx_map = ps_proc->pu4_pic_pu_idx_map;
836
0
                            ps_proc->s_bs_ctxt.i4_next_pu_ctb_cnt = ps_proc->i4_next_pu_ctb_cnt;
837
0
                            ps_proc->s_bs_ctxt.i4_next_tu_ctb_cnt = ps_proc->i4_next_tu_ctb_cnt;
838
0
                            ps_proc->s_bs_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx;
839
0
                            ps_proc->s_bs_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr;
840
0
                            ps_proc->s_bs_ctxt.ps_tile = ps_proc->ps_tile;
841
842
0
                            if(ISLICE == ps_slice_hdr->i1_slice_type)
843
0
                            {
844
0
                                ihevcd_ctb_boundary_strength_islice(&ps_proc->s_bs_ctxt);
845
0
                            }
846
0
                            else
847
0
                            {
848
0
                                ihevcd_ctb_boundary_strength_pbslice(&ps_proc->s_bs_ctxt);
849
0
                            }
850
0
                        }
851
852
                        /* Boundary strength is set to zero if deblocking is disabled for the current slice */
853
0
                        if((0 != ps_slice_hdr->i1_slice_disable_deblocking_filter_flag))
854
0
                        {
855
0
                            WORD32 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) * (ctb_size * ctb_size / 8 / 16);
856
857
0
                            UWORD32 *pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_proc->s_bs_ctxt.pu4_pic_vert_bs +
858
0
                                            ps_proc->i4_ctb_x * (ctb_size * ctb_size / 8 / 16) +
859
0
                                            ps_proc->i4_ctb_y * bs_strd);
860
0
                            UWORD32 *pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_proc->s_bs_ctxt.pu4_pic_horz_bs +
861
0
                                            ps_proc->i4_ctb_x * (ctb_size * ctb_size / 8 / 16) +
862
0
                                            ps_proc->i4_ctb_y * bs_strd);
863
864
0
                            memset(pu4_vert_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
865
0
                            memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
866
0
                        }
867
0
                    }
868
0
                }
869
0
            }
870
871
            /* Per CTB update the following */
872
0
            {
873
0
                WORD32 cur_ctb_idx = ps_proc->i4_ctb_x
874
0
                                + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
875
0
                cur_ctb_idx++;
876
877
0
                ps_proc->pu1_pu_map += nctb * num_minpu_in_ctb;
878
0
                ps_proc->ps_tu += ps_proc->i4_ctb_tu_cnt;
879
0
                if((1 == ps_codec->i4_num_cores) &&
880
0
                                (0 == cur_ctb_idx % RESET_TU_BUF_NCTB))
881
0
                {
882
0
                    ps_proc->ps_tu = ps_proc->ps_pic_tu;
883
0
                }
884
0
                ps_proc->ps_pu += ps_proc->i4_ctb_pu_cnt;
885
0
            }
886
887
            /* Update proc map for recon*/
888
0
            ihevcd_proc_map_update(ps_proc, proc_type, nctb);
889
890
0
            num_ctb_tmp -= nctb;
891
0
            ihevcd_ctb_pos_update(ps_proc, nctb);
892
893
0
        }
894
895
0
        if(cur_slice_idx != ps_proc->i4_cur_slice_idx)
896
0
        {
897
0
            ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1));
898
0
            ps_proc->i4_cur_slice_idx = cur_slice_idx;
899
0
        }
900
        /* Restore the saved variables  */
901
0
        num_ctb_tmp = num_ctb;
902
0
        ps_proc->i4_ctb_x -= num_ctb;
903
0
        ps_proc->i4_ctb_tile_x = cur_ctb_tile_x;
904
0
        ps_proc->i4_ctb_slice_x = cur_ctb_slice_x;
905
0
        ps_proc->i4_ctb_tile_y = cur_ctb_tile_y;
906
0
        ps_proc->i4_ctb_slice_y = cur_ctb_slice_y;
907
0
        ps_proc->pu1_pu_map = pu1_pu_map_cur;
908
0
        ps_proc->ps_tu = ps_tu_cur;
909
0
        proc_type = PROC_RECON;
910
911
0
        while(num_ctb_tmp)
912
0
        {
913
914
            /* Check proc map to ensure dependencies for recon are met */
915
0
            ihevcd_proc_map_check(ps_proc, proc_type, nctb);
916
917
0
            ihevcd_slice_hdr_update(ps_proc);
918
919
0
            {
920
921
0
                ihevcd_ctb_avail_update(ps_proc);
922
923
                /*************************************************/
924
                /**************** IQ IT RECON  *******************/
925
                /*************************************************/
926
927
0
                ihevcd_update_ctb_tu_cnt(ps_proc);
928
929
                /* When scaling matrix is not to be used(scaling_list_enable_flag is zero in SPS),
930
                 * default value of 16 has to be used. Since the value is same for all sizes,
931
                 * same table is used for all cases.
932
                 */
933
0
                if(0 == ps_sps->i1_scaling_list_enable_flag)
934
0
                {
935
0
                    ps_proc->api2_dequant_intra_matrix[0] =
936
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
937
0
                    ps_proc->api2_dequant_intra_matrix[1] =
938
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
939
0
                    ps_proc->api2_dequant_intra_matrix[2] =
940
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
941
0
                    ps_proc->api2_dequant_intra_matrix[3] =
942
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
943
944
0
                    ps_proc->api2_dequant_inter_matrix[0] =
945
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
946
0
                    ps_proc->api2_dequant_inter_matrix[1] =
947
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
948
0
                    ps_proc->api2_dequant_inter_matrix[2] =
949
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
950
0
                    ps_proc->api2_dequant_inter_matrix[3] =
951
0
                                    (WORD16 *)gi2_flat_scale_mat_32x32;
952
0
                }
953
0
                else
954
0
                {
955
0
                    if(0 == ps_sps->i1_sps_scaling_list_data_present_flag)
956
0
                    {
957
0
                        ps_proc->api2_dequant_intra_matrix[0] =
958
0
                                        (WORD16 *)gi2_flat_scale_mat_32x32;
959
0
                        ps_proc->api2_dequant_intra_matrix[1] =
960
0
                                        (WORD16 *)gi2_intra_default_scale_mat_8x8;
961
0
                        ps_proc->api2_dequant_intra_matrix[2] =
962
0
                                        (WORD16 *)gi2_intra_default_scale_mat_16x16;
963
0
                        ps_proc->api2_dequant_intra_matrix[3] =
964
0
                                        (WORD16 *)gi2_intra_default_scale_mat_32x32;
965
966
0
                        ps_proc->api2_dequant_inter_matrix[0] =
967
0
                                        (WORD16 *)gi2_flat_scale_mat_32x32;
968
0
                        ps_proc->api2_dequant_inter_matrix[1] =
969
0
                                        (WORD16 *)gi2_inter_default_scale_mat_8x8;
970
0
                        ps_proc->api2_dequant_inter_matrix[2] =
971
0
                                        (WORD16 *)gi2_inter_default_scale_mat_16x16;
972
0
                        ps_proc->api2_dequant_inter_matrix[3] =
973
0
                                        (WORD16 *)gi2_inter_default_scale_mat_32x32;
974
0
                    }
975
                    /*TODO: Add support for custom scaling matrices */
976
0
                }
977
978
                /* CTB Level pointers */
979
0
                ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma
980
0
                                + (ps_proc->i4_ctb_x * ctb_size
981
0
                                + ps_proc->i4_ctb_y * ctb_size
982
0
                                * ps_codec->i4_strd);
983
0
                if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
984
0
                {
985
0
                    ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma
986
0
                                    + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor)
987
0
                                    + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd * chroma_pixel_strd / (h_samp_factor * v_samp_factor));
988
0
                }
989
990
0
                ihevcd_iquant_itrans_recon_ctb(ps_proc);
991
0
            }
992
993
            /* Per CTB update the following */
994
0
            {
995
0
                WORD32 cur_ctb_idx = ps_proc->i4_ctb_x
996
0
                                + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
997
0
                cur_ctb_idx++;
998
999
0
                ps_proc->pu1_pu_map += nctb * num_minpu_in_ctb;
1000
0
                ps_proc->ps_tu += ps_proc->i4_ctb_tu_cnt;
1001
0
                if((1 == ps_codec->i4_num_cores) &&
1002
0
                                (0 == cur_ctb_idx % RESET_TU_BUF_NCTB))
1003
0
                {
1004
0
                    ps_proc->ps_tu = ps_proc->ps_pic_tu;
1005
0
                }
1006
0
                ps_proc->ps_pu += ps_proc->i4_ctb_pu_cnt;
1007
0
            }
1008
1009
1010
            /* Update proc map for recon*/
1011
0
            ihevcd_proc_map_update(ps_proc, proc_type, nctb);
1012
1013
0
            num_ctb_tmp -= nctb;
1014
0
            ihevcd_ctb_pos_update(ps_proc, nctb);
1015
0
        }
1016
1017
0
        if(cur_slice_idx != ps_proc->i4_cur_slice_idx)
1018
0
        {
1019
0
            ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1));
1020
0
            ps_proc->i4_cur_slice_idx = cur_slice_idx;
1021
0
        }
1022
        /* Restore the saved variables  */
1023
0
        num_ctb_tmp = num_ctb;
1024
0
        ps_proc->i4_ctb_x -= num_ctb;
1025
0
        ps_proc->i4_ctb_tile_x = cur_ctb_tile_x;
1026
0
        ps_proc->i4_ctb_slice_x = cur_ctb_slice_x;
1027
0
        ps_proc->i4_ctb_tile_y = cur_ctb_tile_y;
1028
0
        ps_proc->i4_ctb_slice_y = cur_ctb_slice_y;
1029
0
        pu1_pu_map_nxt = ps_proc->pu1_pu_map;
1030
0
        ps_tu_nxt = ps_proc->ps_tu;
1031
0
        ps_proc->pu1_pu_map = pu1_pu_map_cur;
1032
0
        ps_proc->ps_tu = ps_tu_cur;
1033
0
        proc_type = PROC_DEBLK;
1034
1035
0
        while(num_ctb_tmp)
1036
0
        {
1037
1038
1039
            /* Check proc map to ensure dependencies for deblk are met */
1040
0
            ihevcd_proc_map_check(ps_proc, proc_type, nctb);
1041
1042
0
            ihevcd_slice_hdr_update(ps_proc);
1043
1044
1045
0
            if(0 == ps_codec->i4_disable_deblk_pic)
1046
0
            {
1047
0
                WORD32 i4_is_last_ctb_x = 0;
1048
0
                WORD32 i4_is_last_ctb_y = 0;
1049
1050
1051
                /* Deblocking is done irrespective of whether it is disabled in the slice or not,
1052
                 * to handle deblocking the slice boundaries */
1053
0
                {
1054
0
                    ps_proc->s_deblk_ctxt.ps_pps = ps_proc->ps_pps;
1055
0
                    ps_proc->s_deblk_ctxt.ps_sps = ps_proc->ps_sps;
1056
0
                    ps_proc->s_deblk_ctxt.ps_codec = ps_proc->ps_codec;
1057
0
                    ps_proc->s_deblk_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr;
1058
0
                    ps_proc->s_deblk_ctxt.i4_ctb_x = ps_proc->i4_ctb_x;
1059
0
                    ps_proc->s_deblk_ctxt.i4_ctb_y = ps_proc->i4_ctb_y;
1060
0
                    ps_proc->s_deblk_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx;
1061
0
                    ps_proc->s_deblk_ctxt.is_chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
1062
1063
                    /* Populating Current CTB's no_loop_filter flags */
1064
0
                    {
1065
0
                        WORD32 row;
1066
0
                        WORD32 log2_ctb_size = ps_sps->i1_log2_ctb_size;
1067
1068
                        /* Loop filter strd in units of num bits */
1069
0
                        WORD32 loop_filter_strd = ((ps_sps->i2_pic_width_in_luma_samples + 63) >> 6) << 3;
1070
                        /* Bit position is the current 8x8 bit offset wrt pic_no_loop_filter
1071
                         * bit_pos has to be a WOR32 so that when it is negative, the downshift still retains it to be a negative value */
1072
0
                        WORD32 bit_pos = ((ps_proc->i4_ctb_y << (log2_ctb_size - 3)) - 1) * loop_filter_strd + (ps_proc->i4_ctb_x << (log2_ctb_size - 3)) - 1;
1073
1074
0
                        for(row = 0; row < (ctb_size >> 3) + 1; row++)
1075
0
                        {
1076
                            /* Go to the corresponding byte - read 32 bits and downshift */
1077
0
                            ps_proc->s_deblk_ctxt.au2_ctb_no_loop_filter_flag[row] = (*(UWORD32 *)(ps_proc->pu1_pic_no_loop_filter_flag + (bit_pos >> 3))) >> (bit_pos & 7);
1078
0
                            bit_pos += loop_filter_strd;
1079
0
                        }
1080
0
                    }
1081
1082
0
                    ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y);
1083
1084
                    /* If the last CTB in the row was a complete CTB then deblocking has to be called from remaining pixels, since deblocking
1085
                     * is applied on a shifted CTB structure
1086
                     */
1087
0
                    if(ps_proc->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1)
1088
0
                    {
1089
0
                        WORD32 i4_is_last_ctb_x = 1;
1090
0
                        WORD32 i4_is_last_ctb_y = 0;
1091
1092
0
                        WORD32 last_x_pos;
1093
0
                        last_x_pos = (ps_sps->i2_pic_wd_in_ctb << ps_sps->i1_log2_ctb_size);
1094
0
                        if(last_x_pos  ==  ps_sps->i2_pic_width_in_luma_samples)
1095
0
                        {
1096
0
                            ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y);
1097
0
                        }
1098
0
                    }
1099
1100
1101
                    /* If the last CTB in the column was a complete CTB then deblocking has to be called from remaining pixels, since deblocking
1102
                     * is applied on a shifted CTB structure
1103
                     */
1104
0
                    if(ps_proc->i4_ctb_y == ps_sps->i2_pic_ht_in_ctb - 1)
1105
0
                    {
1106
0
                        WORD32 i4_is_last_ctb_x = 0;
1107
0
                        WORD32 i4_is_last_ctb_y = 1;
1108
0
                        WORD32 last_y_pos;
1109
0
                        last_y_pos = (ps_sps->i2_pic_ht_in_ctb << ps_sps->i1_log2_ctb_size);
1110
0
                        if(last_y_pos == ps_sps->i2_pic_height_in_luma_samples)
1111
0
                        {
1112
0
                            ihevcd_deblk_ctb(&ps_proc->s_deblk_ctxt, i4_is_last_ctb_x, i4_is_last_ctb_y);
1113
0
                        }
1114
0
                    }
1115
0
                }
1116
0
            }
1117
1118
            /* Update proc map for deblk*/
1119
0
            ihevcd_proc_map_update(ps_proc, proc_type, nctb);
1120
1121
0
            num_ctb_tmp -= nctb;
1122
0
            ihevcd_ctb_pos_update(ps_proc, nctb);
1123
0
        }
1124
1125
0
        if(cur_slice_idx != ps_proc->i4_cur_slice_idx)
1126
0
        {
1127
0
            ps_proc->ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((cur_slice_idx)&(MAX_SLICE_HDR_CNT - 1));
1128
0
            ps_proc->i4_cur_slice_idx = cur_slice_idx;
1129
0
        }
1130
        /* Restore the saved variables  */
1131
0
        num_ctb_tmp = num_ctb;
1132
0
        ps_proc->i4_ctb_x -= num_ctb;
1133
0
        ps_proc->i4_ctb_tile_x = cur_ctb_tile_x;
1134
0
        ps_proc->i4_ctb_tile_y = cur_ctb_tile_y;
1135
0
        ps_proc->pu1_pu_map = pu1_pu_map_cur;
1136
0
        ps_proc->ps_tu = ps_tu_cur;
1137
0
        nxt_ctb_slice_y = ps_proc->i4_ctb_slice_y;
1138
0
        nxt_ctb_slice_x = ps_proc->i4_ctb_slice_x;
1139
0
        ps_proc->i4_ctb_slice_y = cur_ctb_slice_y;
1140
0
        ps_proc->i4_ctb_slice_x = cur_ctb_slice_x;
1141
0
        proc_type = PROC_SAO;
1142
1143
0
        while(num_ctb_tmp)
1144
0
        {
1145
1146
1147
            /* Check proc map to ensure dependencies for SAO are met */
1148
0
            ihevcd_proc_map_check(ps_proc, proc_type, nctb);
1149
1150
0
            ihevcd_slice_hdr_update(ps_proc);
1151
1152
1153
1154
0
            {
1155
                /* SAO is done even when it is disabled in the current slice, because
1156
                 * it is performed on a shifted CTB and the neighbor CTBs can belong
1157
                 * to different slices with SAO enabled */
1158
0
                if(0 == ps_codec->i4_disable_sao_pic)
1159
0
                {
1160
0
                    ps_proc->s_sao_ctxt.ps_pps = ps_proc->ps_pps;
1161
0
                    ps_proc->s_sao_ctxt.ps_sps = ps_proc->ps_sps;
1162
0
                    ps_proc->s_sao_ctxt.ps_tile = ps_proc->ps_tile;
1163
0
                    ps_proc->s_sao_ctxt.ps_codec = ps_proc->ps_codec;
1164
0
                    ps_proc->s_sao_ctxt.ps_slice_hdr = ps_proc->ps_slice_hdr;
1165
0
                    ps_proc->s_sao_ctxt.i4_cur_slice_idx = ps_proc->i4_cur_slice_idx;
1166
1167
1168
0
                    ps_proc->s_sao_ctxt.i4_ctb_x = ps_proc->i4_ctb_x;
1169
0
                    ps_proc->s_sao_ctxt.i4_ctb_y = ps_proc->i4_ctb_y;
1170
0
                    ps_proc->s_sao_ctxt.is_chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
1171
1172
0
                    ihevcd_sao_shift_ctb(&ps_proc->s_sao_ctxt);
1173
0
                }
1174
1175
1176
                /* Call padding if required */
1177
0
                {
1178
0
                    chroma_row_strd = ps_codec->i4_strd * chroma_pixel_strd / h_samp_factor;
1179
1180
1181
0
                    if(0 == ps_proc->i4_ctb_x)
1182
0
                    {
1183
0
                        WORD32 pad_ht_luma;
1184
0
                        WORD32 pad_ht_chroma;
1185
1186
0
                        ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma
1187
0
                                        + (ps_proc->i4_ctb_x * ctb_size
1188
0
                                        + ps_proc->i4_ctb_y * ctb_size
1189
0
                                        * ps_codec->i4_strd);
1190
1191
0
                        pad_ht_luma = ctb_size;
1192
0
                        pad_ht_luma += (ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y ? 8 : 0;
1193
                        /* Pad left after 1st CTB is processed */
1194
0
                        ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_ctb_luma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT);
1195
0
                        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
1196
0
                        {
1197
0
                            ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma
1198
0
                                            + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor)
1199
0
                                            + (ps_proc->i4_ctb_y * ctb_size * chroma_row_strd / v_samp_factor);
1200
0
                            pad_ht_chroma = ctb_size / v_samp_factor;
1201
0
                            ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(
1202
0
                                            ps_proc->pu1_cur_ctb_chroma - (8 * v_samp_factor) * chroma_row_strd,
1203
0
                                            chroma_row_strd, pad_ht_chroma, PAD_LEFT * chroma_pixel_strd / h_samp_factor);
1204
0
                        }
1205
0
                    }
1206
1207
0
                    if((ps_sps->i2_pic_wd_in_ctb - 1) == ps_proc->i4_ctb_x)
1208
0
                    {
1209
0
                        WORD32 pad_ht_luma;
1210
0
                        WORD32 pad_ht_chroma;
1211
0
                        WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
1212
1213
0
                        ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma
1214
0
                                        + (ps_proc->i4_ctb_x * ctb_size
1215
0
                                        + ps_proc->i4_ctb_y * ctb_size
1216
0
                                        * ps_codec->i4_strd);
1217
0
                        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
1218
0
                        {
1219
0
                            ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma
1220
0
                                            + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor)
1221
0
                                            + (ps_proc->i4_ctb_y * ctb_size * chroma_row_strd / v_samp_factor);
1222
0
                            pad_ht_chroma = ctb_size / v_samp_factor;
1223
0
                        }
1224
0
                        pad_ht_luma = ctb_size;
1225
0
                        if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y)
1226
0
                        {
1227
0
                            pad_ht_luma += 8;
1228
0
                            if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
1229
0
                            {
1230
0
                                pad_ht_chroma += (8 * v_samp_factor);
1231
0
                                ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(
1232
0
                                                ps_proc->pu1_cur_pic_chroma + ((ps_sps->i2_pic_height_in_luma_samples / v_samp_factor) - (8 * v_samp_factor)) * chroma_row_strd,
1233
0
                                                chroma_row_strd, (8 * v_samp_factor),
1234
0
                                                PAD_LEFT * chroma_pixel_strd / h_samp_factor);
1235
0
                            }
1236
0
                        }
1237
                        /* Pad right after last CTB in the current row is processed */
1238
0
                        ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(ps_proc->pu1_cur_ctb_luma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT);
1239
0
                        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
1240
0
                        {
1241
0
                            ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(
1242
0
                                            ps_proc->pu1_cur_ctb_chroma + (cols_remaining * chroma_pixel_strd / h_samp_factor) - (8 * v_samp_factor) * chroma_row_strd,
1243
0
                                            chroma_row_strd, pad_ht_chroma,
1244
0
                                            PAD_RIGHT * chroma_pixel_strd / h_samp_factor);
1245
0
                        }
1246
1247
0
                        if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y)
1248
0
                        {
1249
0
                            UWORD8 *pu1_buf;
1250
                            /* Since SAO is shifted by 8x8, chroma padding can not be done till second row is processed */
1251
                            /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */
1252
                            /* Pad top after padding left and right for current rows after processing 1st CTB row */
1253
0
                            ihevc_pad_top(ps_proc->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP);
1254
0
                            if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
1255
0
                            {
1256
0
                                ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT * (chroma_pixel_strd / h_samp_factor),
1257
0
                                              chroma_row_strd,
1258
0
                                              (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor),
1259
0
                                              PAD_TOP / v_samp_factor);
1260
0
                            }
1261
1262
                            /* Pad bottom after padding left and right for current rows after processing 1st CTB row */
1263
0
                            pu1_buf = ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT;
1264
0
                            ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT);
1265
0
                            if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
1266
0
                            {
1267
0
                                pu1_buf = ps_proc->pu1_cur_pic_chroma
1268
0
                                                + chroma_row_strd * (ps_sps->i2_pic_height_in_luma_samples / v_samp_factor)
1269
0
                                                - (PAD_LEFT * chroma_pixel_strd / h_samp_factor);
1270
0
                                ihevc_pad_bottom(pu1_buf,
1271
0
                                                 chroma_row_strd,
1272
0
                                                 (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor),
1273
0
                                                 PAD_BOT / v_samp_factor);
1274
0
                            }
1275
0
                        }
1276
0
                    }
1277
1278
0
                }
1279
0
            }
1280
1281
1282
            /* Update proc map for SAO*/
1283
0
            ihevcd_proc_map_update(ps_proc, proc_type, nctb);
1284
            /* Update proc map for Completion of CTB*/
1285
0
            ihevcd_proc_map_update(ps_proc, PROC_ALL, nctb);
1286
0
            {
1287
0
                tile_t *ps_tile;
1288
1289
0
                ps_tile = ps_proc->ps_tile;
1290
0
                num_ctb_tmp -= nctb;
1291
1292
0
                ps_proc->i4_ctb_tile_x += nctb;
1293
0
                ps_proc->i4_ctb_x += nctb;
1294
1295
0
                ps_proc->i4_ctb_slice_x += nctb;
1296
1297
1298
                /* Update tile counters */
1299
0
                if(ps_proc->i4_ctb_tile_x >= (ps_tile->u2_wd))
1300
0
                {
1301
                    /*End of tile row*/
1302
0
                    ps_proc->i4_ctb_tile_x = 0;
1303
0
                    ps_proc->i4_ctb_x = ps_tile->u1_pos_x;
1304
1305
0
                    ps_proc->i4_ctb_tile_y++;
1306
0
                    ps_proc->i4_ctb_y++;
1307
0
                    if(ps_proc->i4_ctb_tile_y == ps_tile->u2_ht)
1308
0
                    {
1309
                        /* Reached End of Tile */
1310
0
                        ps_proc->i4_ctb_tile_y = 0;
1311
0
                        ps_proc->i4_ctb_tile_x = 0;
1312
0
                        ps_proc->ps_tile++;
1313
                        //End of picture
1314
0
                        if(!((ps_tile->u2_ht + ps_tile->u1_pos_y  ==  ps_sps->i2_pic_ht_in_ctb) && (ps_tile->u2_wd + ps_tile->u1_pos_x  ==  ps_sps->i2_pic_wd_in_ctb)))
1315
0
                        {
1316
0
                            ps_tile = ps_proc->ps_tile;
1317
0
                            ps_proc->i4_ctb_x = ps_tile->u1_pos_x;
1318
0
                            ps_proc->i4_ctb_y = ps_tile->u1_pos_y;
1319
1320
0
                        }
1321
0
                    }
1322
0
                }
1323
0
            }
1324
0
        }
1325
1326
0
        ps_proc->i4_ctb_cnt -= num_ctb;
1327
0
    }
1328
0
    return ret;
1329
0
}
1330
1331
void ihevcd_init_proc_ctxt(process_ctxt_t *ps_proc, WORD32 tu_coeff_data_ofst)
1332
0
{
1333
0
    codec_t *ps_codec;
1334
0
    slice_header_t *ps_slice_hdr;
1335
0
    pps_t *ps_pps;
1336
0
    sps_t *ps_sps;
1337
0
    tile_t *ps_tile, *ps_tile_prev;
1338
0
    WORD32 tile_idx;
1339
0
    WORD32 ctb_size;
1340
0
    WORD32 num_minpu_in_ctb;
1341
0
    WORD32 num_ctb_in_row;
1342
0
    WORD32 ctb_addr;
1343
0
    WORD32 i4_wd_in_ctb;
1344
0
    WORD32 tile_start_ctb_idx;
1345
0
    WORD32 slice_start_ctb_idx;
1346
0
    WORD32 check_tile_wd;
1347
0
    WORD32 continuous_tiles = 0; //Refers to tiles that are continuous, within a slice, horizontally
1348
1349
0
    ps_codec = ps_proc->ps_codec;
1350
1351
0
    ps_slice_hdr = ps_codec->ps_slice_hdr_base + ((ps_proc->i4_cur_slice_idx) & (MAX_SLICE_HDR_CNT - 1));
1352
0
    ps_proc->ps_slice_hdr = ps_slice_hdr;
1353
0
    ps_proc->ps_pps = ps_codec->ps_pps_base + ps_slice_hdr->i1_pps_id;
1354
0
    ps_pps = ps_proc->ps_pps;
1355
0
    ps_proc->ps_sps = ps_codec->ps_sps_base + ps_pps->i1_sps_id;
1356
0
    ps_sps = ps_proc->ps_sps;
1357
0
    ps_proc->i4_init_done = 1;
1358
0
    ctb_size = 1 << ps_sps->i1_log2_ctb_size;
1359
0
    num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE);
1360
0
    num_ctb_in_row = ps_sps->i2_pic_wd_in_ctb;
1361
1362
0
    ps_proc->s_sao_ctxt.pu1_slice_idx = ps_proc->pu1_slice_idx;
1363
1364
0
    ihevcd_get_tile_pos(ps_pps, ps_sps, ps_proc->i4_ctb_x, ps_proc->i4_ctb_y,
1365
0
                        &ps_proc->i4_ctb_tile_x, &ps_proc->i4_ctb_tile_y,
1366
0
                        &tile_idx);
1367
1368
0
    ps_proc->ps_tile = ps_pps->ps_tile + tile_idx;
1369
0
    ps_proc->i4_cur_tile_idx = tile_idx;
1370
0
    ps_tile = ps_proc->ps_tile;
1371
1372
0
    if(ps_pps->i1_tiles_enabled_flag)
1373
0
    {
1374
0
        if(tile_idx)
1375
0
            ps_tile_prev = ps_tile - 1;
1376
0
        else
1377
0
            ps_tile_prev = ps_tile;
1378
1379
0
        slice_start_ctb_idx = ps_slice_hdr->i2_ctb_x + (ps_slice_hdr->i2_ctb_y * ps_sps->i2_pic_wd_in_ctb);
1380
0
        tile_start_ctb_idx = ps_tile->u1_pos_x + (ps_tile->u1_pos_y * ps_sps->i2_pic_wd_in_ctb);
1381
1382
        /*Check if
1383
         * 1. Last tile that ends in frame boundary and 1st tile in next row belongs to same slice
1384
         * 1.1. If it does, check if the slice that has these tiles spans across the frame row.
1385
         * 2. Vertical tiles are present within a slice */
1386
0
        if(((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x) && (ps_slice_hdr->i2_ctb_y != ps_tile->u1_pos_y)))
1387
0
        {
1388
0
            continuous_tiles = 1;
1389
0
        }
1390
0
        else
1391
0
        {
1392
0
            check_tile_wd = ps_slice_hdr->i2_ctb_x + ps_tile_prev->u2_wd;
1393
0
            if(!(((check_tile_wd >= ps_sps->i2_pic_wd_in_ctb) && (check_tile_wd % ps_sps->i2_pic_wd_in_ctb == ps_tile->u1_pos_x))
1394
0
                                            || ((ps_slice_hdr->i2_ctb_x == ps_tile->u1_pos_x))))
1395
0
            {
1396
0
                continuous_tiles = 1;
1397
0
            }
1398
0
        }
1399
1400
0
        {
1401
0
            WORD32 i2_independent_ctb_x = ps_slice_hdr->i2_independent_ctb_x;
1402
0
            WORD32 i2_independent_ctb_y = ps_slice_hdr->i2_independent_ctb_y;
1403
1404
            /* Handles cases where
1405
             * 1. Slices begin at the start of each tile
1406
             * 2. Tiles lie in the same slice row.i.e, starting tile_x > slice_x, but tile_y == slice_y
1407
             * */
1408
0
            if(ps_proc->i4_ctb_x >= i2_independent_ctb_x)
1409
0
            {
1410
0
                ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - i2_independent_ctb_x;
1411
0
            }
1412
0
            else
1413
0
            {
1414
                /* Indicates multiple tiles in a slice case where
1415
                 * The new tile belongs to an older slice that started in the previous rows-not the present row
1416
                 * & (tile_y > slice_y and tile_x < slice_x)
1417
                 */
1418
0
                if((slice_start_ctb_idx < tile_start_ctb_idx) && (continuous_tiles))
1419
0
                {
1420
0
                    i4_wd_in_ctb = ps_sps->i2_pic_wd_in_ctb;
1421
0
                }
1422
                /* Indicates many-tiles-in-one-slice case, for slices that end without spanning the frame width*/
1423
0
                else
1424
0
                {
1425
0
                    i4_wd_in_ctb = ps_tile->u2_wd;
1426
0
                }
1427
1428
0
                if(continuous_tiles)
1429
0
                {
1430
0
                    ps_proc->i4_ctb_slice_x = i4_wd_in_ctb
1431
0
                                    - (i2_independent_ctb_x - ps_proc->i4_ctb_x);
1432
0
                }
1433
0
                else
1434
0
                {
1435
0
                    ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - ps_tile->u1_pos_x;
1436
0
                }
1437
0
            }
1438
            /* Initialize ctb slice y to zero and at the start of slice row initialize it
1439
        to difference between ctb_y and slice's start ctb y */
1440
1441
0
            ps_proc->i4_ctb_slice_y = ps_proc->i4_ctb_y - i2_independent_ctb_y;
1442
1443
            /*If beginning of tile, check if slice counters are set correctly*/
1444
0
            if((0 == ps_proc->i4_ctb_tile_x) && (0 == ps_proc->i4_ctb_tile_y))
1445
0
            {
1446
0
                if(ps_slice_hdr->i1_dependent_slice_flag)
1447
0
                {
1448
0
                    ps_proc->i4_ctb_slice_x = 0;
1449
0
                    ps_proc->i4_ctb_slice_y = 0;
1450
0
                }
1451
                /*For slices that span across multiple tiles*/
1452
0
                else if(slice_start_ctb_idx < tile_start_ctb_idx)
1453
0
                {
1454
0
                    ps_proc->i4_ctb_slice_y = ps_tile->u1_pos_y - i2_independent_ctb_y;
1455
                    /* Two Cases
1456
                     * 1 - slice spans across frame-width- but dose not start from 1st column
1457
                     * 2 - Slice spans across multiple tiles anywhere is a frame
1458
                     */
1459
                    /*TODO:In a multiple slice clip,  if an independent slice span across more than 2 tiles in a row, it is not supported*/
1460
0
                    if(continuous_tiles) //Case 2-implemented for slices that span not more than 2 tiles
1461
0
                    {
1462
0
                        if(i2_independent_ctb_y <= ps_tile->u1_pos_y)
1463
0
                        {
1464
                            //Check if ctb x is before or after
1465
0
                            if(i2_independent_ctb_x > ps_tile->u1_pos_x)
1466
0
                            {
1467
0
                                ps_proc->i4_ctb_slice_y -= 1;
1468
0
                            }
1469
0
                        }
1470
0
                    }
1471
0
                }
1472
0
            }
1473
            //Slice starts from a column which is not the starting tile-column, but is within the tile
1474
0
            if(((i2_independent_ctb_x - ps_tile->u1_pos_x) != 0) && ((ps_proc->i4_ctb_slice_y != 0))
1475
0
                            && ((i2_independent_ctb_x >= ps_tile->u1_pos_x) && (i2_independent_ctb_x < ps_tile->u1_pos_x + ps_tile->u2_wd)))
1476
0
            {
1477
0
                ps_proc->i4_ctb_slice_y -= 1;
1478
0
            }
1479
0
        }
1480
0
    }
1481
0
    else
1482
0
    {
1483
0
        WORD32 i2_independent_ctb_x = ps_slice_hdr->i2_independent_ctb_x;
1484
0
        WORD32 i2_independent_ctb_y = ps_slice_hdr->i2_independent_ctb_y;
1485
1486
1487
0
        {
1488
0
            ps_proc->i4_ctb_slice_x = ps_proc->i4_ctb_x - i2_independent_ctb_x;
1489
0
            ps_proc->i4_ctb_slice_y = ps_proc->i4_ctb_y - i2_independent_ctb_y;
1490
0
            if(ps_proc->i4_ctb_slice_x < 0)
1491
0
            {
1492
0
                ps_proc->i4_ctb_slice_x += ps_sps->i2_pic_wd_in_ctb;
1493
0
                ps_proc->i4_ctb_slice_y -= 1;
1494
0
            }
1495
1496
            /* Initialize ctb slice y to zero and at the start of slice row initialize it
1497
            to difference between ctb_y and slice's start ctb y */
1498
0
        }
1499
0
    }
1500
1501
    /* Compute TU offset for the current CTB set */
1502
0
    {
1503
1504
0
        WORD32 ctb_luma_min_tu_cnt;
1505
0
        WORD32 ctb_addr;
1506
1507
0
        ctb_addr = ps_proc->i4_ctb_y * num_ctb_in_row + ps_proc->i4_ctb_x;
1508
1509
0
        ctb_luma_min_tu_cnt = (1 << ps_sps->i1_log2_ctb_size) / MIN_TU_SIZE;
1510
0
        ctb_luma_min_tu_cnt *= ctb_luma_min_tu_cnt;
1511
1512
0
        ps_proc->pu1_tu_map = ps_proc->pu1_pic_tu_map
1513
0
                        + ctb_luma_min_tu_cnt * ctb_addr;
1514
0
        if(1 == ps_codec->i4_num_cores)
1515
0
        {
1516
0
            ps_proc->ps_tu = ps_proc->ps_pic_tu + ps_proc->pu4_pic_tu_idx[ctb_addr % RESET_TU_BUF_NCTB];
1517
0
        }
1518
0
        else
1519
0
        {
1520
0
            ps_proc->ps_tu = ps_proc->ps_pic_tu + ps_proc->pu4_pic_tu_idx[ctb_addr];
1521
0
        }
1522
0
        ps_proc->pv_tu_coeff_data = (UWORD8 *)ps_proc->pv_pic_tu_coeff_data
1523
0
                        + tu_coeff_data_ofst;
1524
1525
0
    }
1526
1527
    /* Compute PU related elements for the current CTB set */
1528
0
    {
1529
0
        WORD32 pu_idx;
1530
0
        ctb_addr = ps_proc->i4_ctb_y * num_ctb_in_row + ps_proc->i4_ctb_x;
1531
0
        pu_idx = ps_proc->pu4_pic_pu_idx[ctb_addr];
1532
0
        ps_proc->pu1_pu_map = ps_proc->pu1_pic_pu_map
1533
0
                        + ctb_addr * num_minpu_in_ctb;
1534
0
        ps_proc->ps_pu = ps_proc->ps_pic_pu + pu_idx;
1535
0
    }
1536
1537
    /* Number of ctbs processed in one loop of process function */
1538
0
    {
1539
0
        ps_proc->i4_nctb = MIN(ps_codec->u4_nctb, ps_tile->u2_wd);
1540
0
    }
1541
1542
0
}
1543
void ihevcd_process_thread(process_ctxt_t *ps_proc)
1544
0
{
1545
0
    IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
1546
0
    {
1547
0
        ithread_set_affinity(ps_proc->i4_id + 1);
1548
0
    }
1549
1550
0
    while(1)
1551
0
    {
1552
0
        codec_t *ps_dec = ps_proc->ps_codec;
1553
0
        if(ps_proc->ps_codec->i4_threads_active)
1554
0
        {
1555
0
            DEBUG("In ihevcd_process_thread \n");
1556
1557
0
            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]);
1558
0
            if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret)
1559
0
                break;
1560
1561
0
            while(!ps_dec->ai4_process_start[ps_proc->i4_id])
1562
0
            {
1563
0
                ithread_cond_wait(ps_dec->apv_proc_start_condition[ps_proc->i4_id],
1564
0
                                  ps_dec->apv_proc_start_mutex[ps_proc->i4_id]);
1565
0
            }
1566
0
            ps_dec->ai4_process_start[ps_proc->i4_id] = 0;
1567
0
            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]);
1568
0
            if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret)
1569
0
                break;
1570
1571
0
            DEBUG(" Got control at ihevcd_process_thread \n");
1572
1573
0
            if(ps_dec->i4_break_threads == 1)
1574
0
                break;
1575
0
        }
1576
0
        while(1)
1577
0
        {
1578
0
            proc_job_t s_job;
1579
1580
0
            ret = ihevcd_jobq_dequeue((jobq_t *)ps_proc->pv_proc_jobq, &s_job,
1581
0
                                    sizeof(proc_job_t), 1);
1582
0
            if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret)
1583
0
                break;
1584
1585
0
            ps_proc->i4_ctb_cnt = s_job.i2_ctb_cnt;
1586
0
            ps_proc->i4_ctb_x = s_job.i2_ctb_x;
1587
0
            ps_proc->i4_ctb_y = s_job.i2_ctb_y;
1588
0
            ps_proc->i4_cur_slice_idx = s_job.i2_slice_idx;
1589
1590
1591
1592
0
            if(CMD_PROCESS == s_job.i4_cmd)
1593
0
            {
1594
0
                ihevcd_init_proc_ctxt(ps_proc, s_job.i4_tu_coeff_data_ofst);
1595
0
                ihevcd_process(ps_proc);
1596
0
            }
1597
0
            else if(CMD_FMTCONV == s_job.i4_cmd)
1598
0
            {
1599
0
                sps_t *ps_sps;
1600
0
                codec_t *ps_codec;
1601
0
                ivd_out_bufdesc_t *ps_out_buffer;
1602
0
                WORD32 num_rows;
1603
1604
0
                if(0 == ps_proc->i4_init_done)
1605
0
                {
1606
0
                    ihevcd_init_proc_ctxt(ps_proc, 0);
1607
0
                }
1608
0
                ps_sps = ps_proc->ps_sps;
1609
0
                ps_codec = ps_proc->ps_codec;
1610
0
                ps_out_buffer = ps_proc->ps_out_buffer;
1611
0
                num_rows = 1 << ps_sps->i1_log2_ctb_size;
1612
1613
0
                num_rows = MIN(num_rows,
1614
0
                               (ps_codec->i4_disp_ht - (s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size))
1615
0
                              );
1616
1617
0
                if(num_rows < 0)
1618
0
                    num_rows = 0;
1619
1620
0
                ihevcd_fmt_conv(ps_proc->ps_codec, ps_proc, ps_out_buffer->pu1_bufs[0],
1621
0
                                ps_out_buffer->pu1_bufs[1], ps_out_buffer->pu1_bufs[2],
1622
0
                                s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size, num_rows);
1623
0
            }
1624
0
        }
1625
0
        if(ps_proc->ps_codec->i4_threads_active)
1626
0
        {
1627
0
            ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]);
1628
0
            if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret)
1629
0
                break;
1630
1631
0
            ps_dec->ai4_process_done[ps_proc->i4_id] = 1;
1632
0
            ithread_cond_signal(ps_dec->apv_proc_done_condition[ps_proc->i4_id]);
1633
1634
0
            ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]);
1635
0
            if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret)
1636
0
                break;
1637
0
        }
1638
0
        else
1639
0
        {
1640
0
            break;
1641
0
        }
1642
0
    }
1643
    //ithread_exit(0);
1644
0
    return;
1645
0
}
1646