Coverage Report

Created: 2026-04-12 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_iquant_itrans_recon_ctb.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevcd_iquant_itrans_recon_ctb.c
22
 *
23
 * @brief
24
 *  Contains functions for inverse quantization, inverse transform and recon
25
 *
26
 * @author
27
 *  Ittiam
28
 *
29
 * @par List of Functions:
30
 * - ihevcd_iquant_itrans_recon_ctb()
31
 *
32
 * @remarks
33
 *  None
34
 *
35
 *******************************************************************************
36
 */
37
/*****************************************************************************/
38
/* File Includes                                                             */
39
/*****************************************************************************/
40
#include <stdio.h>
41
#include <stddef.h>
42
#include <stdlib.h>
43
#include <string.h>
44
45
#include "ihevc_typedefs.h"
46
#include "iv.h"
47
#include "ivd.h"
48
#include "ihevcd_cxa.h"
49
50
#include "ihevc_defs.h"
51
#include "ihevc_debug.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_cabac_tables.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
57
#include "ihevcd_defs.h"
58
#include "ihevcd_function_selector.h"
59
#include "ihevcd_structs.h"
60
#include "ihevcd_error.h"
61
#include "ihevcd_bitstream.h"
62
#include "ihevc_common_tables.h"
63
64
/* Intra pred includes */
65
#include "ihevc_intra_pred.h"
66
67
/* Inverse transform common module includes */
68
#include "ihevc_trans_tables.h"
69
#include "ihevc_trans_macros.h"
70
#include "ihevc_itrans_recon.h"
71
#include "ihevc_recon.h"
72
#include "ihevc_chroma_itrans_recon.h"
73
#include "ihevc_chroma_recon.h"
74
75
/* Decoder includes */
76
#include "ihevcd_common_tables.h"
77
#include "ihevcd_iquant_itrans_recon_ctb.h"
78
#include "ihevcd_debug.h"
79
#include "ihevcd_profile.h"
80
#include "ihevcd_statistics.h"
81
#include "ihevcd_itrans_recon_dc.h"
82
83
84
/* Globals */
85
static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
86
  { IP_FUNC_MODE_0, /* Mode 0 */
87
    IP_FUNC_MODE_1, /* Mode 1 */
88
    IP_FUNC_MODE_2, /* Mode 2 */
89
    IP_FUNC_MODE_3TO9, /* Mode 3 */
90
    IP_FUNC_MODE_3TO9, /* Mode 4 */
91
    IP_FUNC_MODE_3TO9, /* Mode 5 */
92
    IP_FUNC_MODE_3TO9, /* Mode 6 */
93
    IP_FUNC_MODE_3TO9, /* Mode 7 */
94
    IP_FUNC_MODE_3TO9, /* Mode 8 */
95
    IP_FUNC_MODE_3TO9, /* Mode 9 */
96
    IP_FUNC_MODE_10, /* Mode 10 */
97
    IP_FUNC_MODE_11TO17, /* Mode 11 */
98
    IP_FUNC_MODE_11TO17, /* Mode 12 */
99
    IP_FUNC_MODE_11TO17, /* Mode 13 */
100
    IP_FUNC_MODE_11TO17, /* Mode 14 */
101
    IP_FUNC_MODE_11TO17, /* Mode 15 */
102
    IP_FUNC_MODE_11TO17, /* Mode 16 */
103
    IP_FUNC_MODE_11TO17, /* Mode 17 */
104
    IP_FUNC_MODE_18_34, /* Mode 18 */
105
    IP_FUNC_MODE_19TO25, /* Mode 19 */
106
    IP_FUNC_MODE_19TO25, /* Mode 20 */
107
    IP_FUNC_MODE_19TO25, /* Mode 21 */
108
    IP_FUNC_MODE_19TO25, /* Mode 22 */
109
    IP_FUNC_MODE_19TO25, /* Mode 23 */
110
    IP_FUNC_MODE_19TO25, /* Mode 24 */
111
    IP_FUNC_MODE_19TO25, /* Mode 25 */
112
    IP_FUNC_MODE_26, /* Mode 26 */
113
    IP_FUNC_MODE_27TO33, /* Mode 27 */
114
    IP_FUNC_MODE_27TO33, /* Mode 26 */
115
    IP_FUNC_MODE_27TO33, /* Mode 29 */
116
    IP_FUNC_MODE_27TO33, /* Mode 30 */
117
    IP_FUNC_MODE_27TO33, /* Mode 31 */
118
    IP_FUNC_MODE_27TO33, /* Mode 32 */
119
    IP_FUNC_MODE_27TO33, /* Mode 33 */
120
    IP_FUNC_MODE_18_34, /* Mode 34 */
121
};
122
123
124
const WORD16 *g_ai2_ihevc_trans_tables[] =
125
  { &g_ai2_ihevc_trans_dst_4[0][0],
126
    &g_ai2_ihevc_trans_4[0][0],
127
    &g_ai2_ihevc_trans_8[0][0],
128
    &g_ai2_ihevc_trans_16[0][0],
129
    &g_ai2_ihevc_trans_32[0][0]
130
};
131
132
133
/*****************************************************************************/
134
/* Structures                                                                */
135
/*****************************************************************************/
136
/**
137
 * Structure to hold fields required for iq it recon construction process
138
 */
139
typedef struct
140
{
141
    /*
142
     * parsed transform coeffs
143
     */
144
    WORD16 *pi2_tu_coeff;
145
146
    /**
147
     * pred buffer
148
     */
149
    UWORD8 *pu1_pred;
150
151
    /**
152
     * recon buffer
153
     */
154
    UWORD8 *pu1_dst;
155
156
    /**
157
     * transform coeffs buffer stride
158
     */
159
    WORD32 tu_coeff_stride;
160
161
    /**
162
     * pred buffer stride
163
     */
164
    WORD32 pred_strd;
165
166
    /**
167
     * recon buffer stride
168
     */
169
    WORD32 dst_strd;
170
171
    /**
172
     * zero cols, zero rows for optimizing itrans process
173
     */
174
    UWORD32 zero_cols;
175
    UWORD32 zero_rows;
176
177
    /**
178
     * dc only? for optimizing itrans process
179
     */
180
    UWORD32 coeff_type;
181
    WORD16 coeff_value;
182
183
    /**
184
     * cbf
185
     */
186
    UWORD8 cbf;
187
188
    /**
189
     * is transform skip
190
     */
191
    UWORD8 transform_skip_flag;
192
193
#ifdef ENABLE_MAIN_REXT_PROFILE
194
    /**
195
     * is explicit rdpcm enabled
196
     */
197
    UWORD8 explicit_rdpcm_flag;
198
199
    /**
200
     * explicit rdpcm dir
201
     */
202
    UWORD8 explicit_rdpcm_dir;
203
#endif
204
205
} tu_plane_iq_it_recon_ctxt_t;
206
207
208
/*****************************************************************************/
209
/* Function Prototypes                                                       */
210
/*****************************************************************************/
211
typedef void (*PF_IQITRECON_PLANE)(process_ctxt_t *ps_proc,
212
                                   tu_t *ps_tu,
213
                                   tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
214
                                   WORD32 func_idx,
215
                                   WORD32 log2_trans_size,
216
                                   CHROMA_PLANE_ID_T chroma_plane,
217
                                   WORD8 intra_flag,
218
                                   WORD8 intra_pred_mode);
219
220
/* Returns number of ai2_level read from ps_sblk_coeff */
221
UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
222
                             WORD32 log2_trans_size,
223
                             UWORD8 *pu1_tu_coeff_data,
224
                             WORD16 *pi2_dequant_matrix,
225
                             WORD32 qp_rem,
226
                             WORD32 qp_div,
227
                             TRANSFORM_TYPE e_trans_type,
228
                             WORD32 trans_quant_bypass,
229
                             UWORD32 *pu4_zero_cols,
230
                             UWORD32 *pu4_zero_rows,
231
                             UWORD32 *pu4_coeff_type,
232
                             WORD16 *pi2_coeff_value)
233
2.46M
{
234
    /* Generating coeffs from coeff-map */
235
2.46M
    WORD32 i;
236
2.46M
    WORD16 *pi2_sblk_ptr;
237
2.46M
    WORD32 subblk_pos_x, subblk_pos_y;
238
2.46M
    WORD32 sblk_scan_idx, coeff_raster_idx;
239
2.46M
    WORD32 sblk_non_zero_coeff_idx;
240
2.46M
    tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
241
2.46M
    UWORD8 u1_num_coded_sblks, u1_scan_type;
242
2.46M
    UWORD8 *pu1_new_tu_coeff_data;
243
2.46M
    WORD32 trans_size;
244
2.46M
    WORD32 xs, ys;
245
2.46M
    WORD32 trans_skip;
246
2.46M
    WORD16 iquant_out;
247
2.46M
    WORD32 shift_iq;
248
2.46M
    {
249
2.46M
        WORD32 bit_depth;
250
251
2.46M
        bit_depth = 8 + 0;
252
2.46M
        shift_iq = bit_depth + log2_trans_size - 5;
253
2.46M
    }
254
2.46M
    trans_size = (1 << log2_trans_size);
255
256
    /* First byte points to number of coded blocks */
257
2.46M
    u1_num_coded_sblks = *pu1_tu_coeff_data++;
258
259
    /* Next byte points to scan type */
260
2.46M
    u1_scan_type = *pu1_tu_coeff_data++;
261
    /* 0th bit has trans_skip */
262
2.46M
    trans_skip = u1_scan_type & 1;
263
2.46M
#ifdef ENABLE_MAIN_REXT_PROFILE
264
2.46M
    u1_scan_type = (u1_scan_type & 0xF) >> 1;
265
#else
266
    u1_scan_type >>= 1;
267
#endif
268
269
2.46M
    pi2_sblk_ptr = pi2_tu_coeff;
270
271
    /* Initially all columns are assumed to be zero */
272
2.46M
    *pu4_zero_cols = 0xFFFFFFFF;
273
    /* Initially all rows are assumed to be zero */
274
2.46M
    *pu4_zero_rows = 0xFFFFFFFF;
275
276
2.46M
    ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
277
278
2.46M
    if(trans_skip)
279
373k
        memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
280
281
2.46M
    STATS_INIT_SBLK_AND_COEFF_POS();
282
283
    /* DC only case */
284
2.46M
    if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
285
1.34M
                    && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
286
1.27M
                    && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
287
490k
    {
288
490k
        *pu4_coeff_type = 1;
289
290
490k
        if(!trans_quant_bypass)
291
475k
        {
292
475k
            if(4 == trans_size)
293
198k
            {
294
198k
                IQUANT_4x4(iquant_out,
295
198k
                           ps_tu_sblk_coeff_data->ai2_level[0],
296
198k
                           pi2_dequant_matrix[0]
297
198k
                                           * g_ihevc_iquant_scales[qp_rem],
298
198k
                           shift_iq, qp_div);
299
198k
            }
300
276k
            else
301
276k
            {
302
276k
                IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
303
276k
                       pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
304
276k
                       shift_iq, qp_div);
305
276k
            }
306
475k
            if(trans_skip)
307
53.3k
                iquant_out = (iquant_out + 16) >> 5;
308
475k
        }
309
15.1k
        else
310
15.1k
        {
311
            /* setting the column to zero */
312
174k
            for(i = 0; i < trans_size; i++)
313
159k
                *(pi2_tu_coeff + i * trans_size) = 0;
314
315
15.1k
            iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
316
15.1k
        }
317
490k
        *pi2_coeff_value = iquant_out;
318
490k
        *pi2_tu_coeff = iquant_out;
319
490k
        *pu4_zero_cols &= ~0x1;
320
490k
        *pu4_zero_rows &= ~0x1;
321
490k
        ps_tu_sblk_coeff_data =
322
490k
                        (void *)&ps_tu_sblk_coeff_data->ai2_level[1];
323
324
490k
        STATS_UPDATE_COEFF_COUNT();
325
490k
        STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass),  0, 0);
326
490k
        STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
327
490k
        return ((UWORD8 *)ps_tu_sblk_coeff_data);
328
490k
    }
329
1.97M
    else
330
1.97M
    {
331
1.97M
        *pu4_coeff_type = 0;
332
        /* In case of trans skip, memset has already happened */
333
1.97M
        if(!trans_skip)
334
1.65M
            memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
335
1.97M
    }
336
337
4.58M
    for(i = 0; i < u1_num_coded_sblks; i++)
338
2.60M
    {
339
2.60M
        UWORD32 u4_sig_coeff_map;
340
2.60M
        subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
341
2.60M
        subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
342
343
2.60M
        STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
344
345
2.60M
        subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
346
2.60M
        subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
347
348
2.60M
        pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
349
2.60M
                        + subblk_pos_x;
350
351
        //*pu4_zero_cols &= ~(0xF << subblk_pos_x);
352
353
2.60M
        sblk_non_zero_coeff_idx = 0;
354
2.60M
        u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
355
        //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
356
2.60M
        sblk_scan_idx = 31;
357
2.60M
        do
358
10.5M
        {
359
10.5M
            WORD32 clz = CLZ(u4_sig_coeff_map);
360
361
10.5M
            sblk_scan_idx -= clz;
362
            /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
363
            /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
364
10.5M
            u4_sig_coeff_map = u4_sig_coeff_map << clz;
365
            /* Copying coeffs and storing in reverse order */
366
10.5M
            {
367
10.5M
                STATS_UPDATE_COEFF_COUNT();
368
10.5M
                coeff_raster_idx =
369
10.5M
                                gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
370
371
10.5M
                xs = coeff_raster_idx & 0x3;
372
10.5M
                ys = coeff_raster_idx >> 2;
373
374
10.5M
                if(!trans_quant_bypass)
375
10.3M
                {
376
10.3M
                    if(4 == trans_size)
377
4.74M
                    {
378
4.74M
                        IQUANT_4x4(iquant_out,
379
4.74M
                                   ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
380
4.74M
                                   pi2_dequant_matrix[(subblk_pos_x + xs)
381
4.74M
                                                   + (subblk_pos_y + ys)
382
4.74M
                                                   * trans_size]
383
4.74M
                                   * g_ihevc_iquant_scales[qp_rem],
384
4.74M
                                   shift_iq, qp_div);
385
4.74M
                        sblk_non_zero_coeff_idx++;
386
4.74M
                    }
387
5.63M
                    else
388
5.63M
                    {
389
5.63M
                        IQUANT(iquant_out,
390
5.63M
                               ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
391
5.63M
                               pi2_dequant_matrix[(subblk_pos_x + xs)
392
5.63M
                                               + (subblk_pos_y + ys)
393
5.63M
                                               * trans_size]
394
5.63M
                               * g_ihevc_iquant_scales[qp_rem],
395
5.63M
                               shift_iq, qp_div);
396
5.63M
                        sblk_non_zero_coeff_idx++;
397
5.63M
                    }
398
399
10.3M
                    if(trans_skip)
400
1.12M
                        iquant_out = (iquant_out + 16) >> 5;
401
10.3M
                }
402
150k
                else
403
150k
                {
404
150k
                    iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
405
150k
                }
406
10.5M
                *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
407
10.5M
                *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
408
10.5M
                *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
409
10.5M
            }
410
10.5M
            sblk_scan_idx--;
411
10.5M
            u4_sig_coeff_map <<= 1;
412
413
10.5M
        }while(u4_sig_coeff_map);
414
        /* Updating the sblk pointer */
415
2.60M
        ps_tu_sblk_coeff_data =
416
2.60M
                        (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
417
2.60M
    }
418
419
1.97M
    STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
420
421
1.97M
    pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
422
423
1.97M
    return pu1_new_tu_coeff_data;
424
2.46M
}
425
426
WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
427
                                 tu_t *ps_tu,
428
                                 UWORD32 *pu4_intra_nbr_avail,
429
                                 WORD16 i2_pic_width_in_luma_samples,
430
                                 UWORD8 i1_constrained_intra_pred_flag,
431
                                 WORD32 trans_size,
432
                                 WORD32 ctb_size)
433
2.15M
{
434
2.15M
    sps_t *ps_sps;
435
2.15M
    UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
436
2.15M
                    u1_top_lt_avail;
437
2.15M
    WORD32 x_cur, y_cur, x_nbr, y_nbr;
438
2.15M
    UWORD8 *pu1_nbr_intra_flag;
439
2.15M
    UWORD8 *pu1_pic_intra_flag;
440
2.15M
    UWORD8 top_right, top, top_left, left, bot_left;
441
2.15M
    WORD32 intra_pos;
442
2.15M
    WORD32 num_8_blks, num_8_blks_in_bits;
443
2.15M
    WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
444
2.15M
    WORD32 cur_x, cur_y;
445
2.15M
    WORD32 i;
446
2.15M
    WORD32 nbr_flags;
447
448
2.15M
    ps_sps = ps_proc->ps_sps;
449
2.15M
    cur_x = ps_tu->b4_pos_x;
450
2.15M
    cur_y = ps_tu->b4_pos_y;
451
452
2.15M
    u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
453
2.15M
                    >> (31 - (1 + cur_x - 1))) & 1;
454
2.15M
    u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
455
2.15M
                    & 1;
456
2.15M
    u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
457
2.15M
                    & 1;
458
2.15M
    u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
459
2.15M
                    >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
460
2.15M
    u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
461
2.15M
                    >> (31 - (1 + cur_x - 1))) & 1;
462
463
2.15M
    x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
464
2.15M
    y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
465
466
2.15M
    pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
467
468
    /* WORD32 nbr_flags as below  MSB --> LSB */
469
    /*    Top-Left | Top-Right | Top | Left | Bottom-Left
470
     *       1         4         4     4         4
471
     */
472
2.15M
    bot_left = 0;
473
2.15M
    left = 0;
474
2.15M
    top_right = 0;
475
2.15M
    top = 0;
476
2.15M
    top_left = 0;
477
478
2.15M
    num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
479
2.15M
    num_8_blks_in_bits = ((1 << num_8_blks) - 1);
480
481
2.15M
    if(i1_constrained_intra_pred_flag)
482
420k
    {
483
        /* TODO: constrained intra pred not tested */
484
420k
        if(u1_bot_lt_avail)
485
117k
        {
486
117k
            x_nbr = x_cur - 1;
487
117k
            y_nbr = y_cur + trans_size;
488
489
117k
            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
490
117k
                            + x_nbr / 64;
491
117k
            intra_pos = ((x_nbr / 8) % 8);
492
403k
            for(i = 0; i < num_8_blks; i++)
493
285k
            {
494
285k
                bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
495
285k
                                >> intra_pos) & 1) << i;
496
285k
            }
497
117k
            bot_left &= num_8_blks_in_bits;
498
117k
        }
499
420k
        if(u1_left_avail)
500
413k
        {
501
413k
            x_nbr = x_cur - 1;
502
413k
            y_nbr = y_cur;
503
504
413k
            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
505
413k
                            + x_nbr / 64;
506
413k
            intra_pos = ((x_nbr / 8) % 8);
507
508
1.49M
            for(i = 0; i < num_8_blks; i++)
509
1.07M
            {
510
1.07M
                left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
511
1.07M
                                & 1) << i;
512
1.07M
            }
513
413k
            left &= num_8_blks_in_bits;
514
413k
        }
515
420k
        if(u1_top_avail)
516
383k
        {
517
383k
            x_nbr = x_cur;
518
383k
            y_nbr = y_cur - 1;
519
520
383k
            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
521
383k
                            + x_nbr / 64;
522
383k
            intra_pos = ((x_nbr / 8) % 8);
523
524
383k
            top = (*pu1_nbr_intra_flag >> intra_pos);
525
383k
            top &= num_8_blks_in_bits;
526
            /*
527
             for(i=0;i<num_8_blks;i++)
528
             {
529
             top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
530
             }
531
             */
532
383k
        }
533
420k
        if(u1_top_rt_avail)
534
259k
        {
535
259k
            x_nbr = x_cur + trans_size;
536
259k
            y_nbr = y_cur - 1;
537
538
259k
            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
539
259k
                            + x_nbr / 64;
540
259k
            intra_pos = ((x_nbr / 8) % 8);
541
542
259k
            top_right = (*pu1_nbr_intra_flag >> intra_pos);
543
259k
            top_right &= num_8_blks_in_bits;
544
            /*
545
             for(i=0;i<num_8_blks;i++)
546
             {
547
             top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
548
             }
549
             */
550
259k
        }
551
420k
        if(u1_top_lt_avail)
552
376k
        {
553
376k
            x_nbr = x_cur - 1;
554
376k
            y_nbr = y_cur - 1;
555
556
376k
            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
557
376k
                            + x_nbr / 64;
558
376k
            intra_pos = ((x_nbr / 8) % 8);
559
560
376k
            top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
561
376k
        }
562
420k
    }
563
1.73M
    else
564
1.73M
    {
565
1.73M
        if(u1_top_avail)
566
1.66M
            top = 0xF;
567
1.73M
        if(u1_top_rt_avail)
568
1.09M
            top_right = 0xF;
569
1.73M
        if(u1_bot_lt_avail)
570
544k
            bot_left = 0xF;
571
1.73M
        if(u1_left_avail)
572
1.70M
            left = 0xF;
573
1.73M
        if(u1_top_lt_avail)
574
1.64M
            top_left = 0x1;
575
1.73M
    }
576
577
    /* Handling incomplete CTBs */
578
2.15M
    {
579
2.15M
        WORD32 pu_size_limit = MIN(trans_size, 8);
580
2.15M
        WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
581
2.15M
                        - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
582
2.15M
                        - (ps_tu->b4_pos_x * MIN_TU_SIZE)
583
2.15M
                        - (1 << (ps_tu->b3_size + 2));
584
        /* ctb_size_top gives number of valid pixels remaining in the current row */
585
2.15M
        WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
586
2.15M
        WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
587
588
2.15M
        WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
589
2.15M
                        - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
590
2.15M
                        - (ps_tu->b4_pos_y * MIN_TU_SIZE)
591
2.15M
                        - (1 << (ps_tu->b3_size + 2));
592
        /* ctb_size_bot gives number of valid pixels remaining in the current column */
593
2.15M
        WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
594
2.15M
        WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
595
596
2.15M
        top_right &= ctb_size_top_bits;
597
2.15M
        bot_left &= ctb_size_bot_bits;
598
2.15M
    }
599
600
    /*    Top-Left | Top-Right | Top | Left | Bottom-Left
601
     *      1         4         4     4         4
602
     */
603
604
    /*
605
     nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
606
     | gau4_ihevcd_4_bit_reverse[bot_left];
607
     */
608
2.15M
    nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
609
2.15M
                    | gau4_ihevcd_4_bit_reverse[bot_left];
610
611
612
2.15M
    return nbr_flags;
613
614
2.15M
}
615
616
static void ihevcd_iquant_itrans_recon_tu_plane(process_ctxt_t *ps_proc,
617
                                                tu_t *ps_tu,
618
                                                tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
619
                                                WORD32 func_idx,
620
                                                WORD32 log2_trans_size,
621
                                                CHROMA_PLANE_ID_T chroma_plane,
622
                                                WORD8 intra_flag,
623
                                                WORD8 intra_pred_mode)
624
46.6M
{
625
46.6M
    sps_t *ps_sps = ps_proc->ps_sps;
626
46.6M
    pps_t *ps_pps = ps_proc->ps_pps;
627
46.6M
    codec_t *ps_codec = ps_proc->ps_codec;
628
629
46.6M
    if(1 == ps_pl_tu_ctxt->cbf)
630
2.46M
    {
631
2.46M
        if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag)
632
414k
        {
633
            /* Recon */
634
414k
            ps_codec->apf_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff, ps_pl_tu_ctxt->pu1_pred,
635
414k
                                          ps_pl_tu_ctxt->pu1_dst, ps_pl_tu_ctxt->tu_coeff_stride,
636
414k
                                          ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd,
637
414k
                                          ps_pl_tu_ctxt->zero_cols);
638
414k
        }
639
2.04M
        else
640
2.04M
        {
641
            /* iQuant , iTrans and Recon */
642
2.04M
            if((0 == ps_pl_tu_ctxt->coeff_type))
643
1.62M
            {
644
1.62M
                ps_codec->apf_itrans_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff,
645
1.62M
                                                     ps_proc->pi2_itrans_intrmd_buf,
646
1.62M
                                                     ps_pl_tu_ctxt->pu1_pred,
647
1.62M
                                                     ps_pl_tu_ctxt->pu1_dst,
648
1.62M
                                                     ps_pl_tu_ctxt->tu_coeff_stride,
649
1.62M
                                                     ps_pl_tu_ctxt->pred_strd,
650
1.62M
                                                     ps_pl_tu_ctxt->dst_strd,
651
1.62M
                                                     ps_pl_tu_ctxt->zero_cols,
652
1.62M
                                                     ps_pl_tu_ctxt->zero_rows);
653
1.62M
            }
654
420k
            else /* DC only */
655
420k
            {
656
420k
                ps_codec->apf_itrans_recon_dc[chroma_plane != NULL_PLANE](
657
420k
                                ps_pl_tu_ctxt->pu1_pred, ps_pl_tu_ctxt->pu1_dst,
658
420k
                                ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, log2_trans_size,
659
420k
                                ps_pl_tu_ctxt->coeff_value);
660
420k
            }
661
2.04M
        }
662
2.46M
    }
663
46.6M
}
664
665
#ifdef ENABLE_MAIN_REXT_PROFILE
666
static void ihevcd_iquant_itrans_resi_recon_tu_plane(process_ctxt_t *ps_proc,
667
                                                     tu_t *ps_tu,
668
                                                     tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
669
                                                     WORD32 func_idx,
670
                                                     WORD32 log2_trans_size,
671
                                                     CHROMA_PLANE_ID_T chroma_plane,
672
                                                     WORD8 intra_flag,
673
                                                     WORD8 intra_pred_mode)
674
0
{
675
0
    sps_t *ps_sps = ps_proc->ps_sps;
676
0
    pps_t *ps_pps = ps_proc->ps_pps;
677
0
    codec_t *ps_codec = ps_proc->ps_codec;
678
0
    WORD8 trans_size = 1 << log2_trans_size;
679
0
    WORD16 *pi2_res = ps_proc->pi2_res_luma_buf;
680
0
    WORD16 *pi2_res_uv = ps_proc->pi2_res_chroma_buf;
681
0
    WORD32 alpha = 0;
682
0
    WORD16 *residue_out_base = chroma_plane == NULL_PLANE ? pi2_res : pi2_res_uv;
683
0
    WORD16 *residue_out = residue_out_base;
684
    // if both rdpcm and rotate are to be applied, share the output residue buffer between the
685
    // two transforms
686
0
    WORD16 *residue_out_intrmdt = residue_out_base + (TRANS_SIZE_4 * TRANS_SIZE_4);
687
688
0
    if(chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0)
689
0
    {
690
0
        alpha = (1 << (ps_tu->b3_cb_log2_res_scale_abs_plus1 - 1))
691
0
                        * (1 - 2 * ps_tu->b1_cb_log2_res_sign);
692
0
    }
693
0
    else if(chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0)
694
0
    {
695
0
        alpha = (1 << (ps_tu->b3_cr_log2_res_scale_abs_plus1 - 1))
696
0
                        * (1 - 2 * ps_tu->b1_cr_log2_res_sign);
697
0
    }
698
0
    if(1 == ps_pl_tu_ctxt->cbf)
699
0
    {
700
0
        if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag)
701
0
        {
702
0
            WORD8 rotate = ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4
703
0
                            && intra_flag;
704
0
            WORD8 rdpcm = (ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag
705
0
                            && (intra_pred_mode == 10 || intra_pred_mode == 26))
706
0
                            || ps_pl_tu_ctxt->explicit_rdpcm_flag;
707
0
            WORD16 *src_residue = ps_pl_tu_ctxt->pi2_tu_coeff;
708
0
            WORD16 src_residue_strd = ps_pl_tu_ctxt->tu_coeff_stride;
709
710
0
            if(rotate)
711
0
            {
712
0
                ihevc_res_4x4_rotate(src_residue, rdpcm ? residue_out_intrmdt : residue_out,
713
0
                                     src_residue_strd, trans_size, ps_pl_tu_ctxt->zero_cols);
714
0
                ps_pl_tu_ctxt->zero_cols =
715
0
                                gau4_ihevcd_4_bit_reverse[ps_pl_tu_ctxt->zero_cols & 0xF];
716
0
                src_residue = residue_out_intrmdt;
717
0
                src_residue_strd = trans_size;
718
0
            }
719
720
0
            if(rdpcm)
721
0
            {
722
0
                WORD8 rdpcm_dir =
723
0
                                ps_pl_tu_ctxt->explicit_rdpcm_flag ?
724
0
                                                ps_pl_tu_ctxt->explicit_rdpcm_dir :
725
0
                                                intra_pred_mode != 10;
726
0
                if(rdpcm_dir == 0)
727
0
                {
728
0
                    ihevc_res_nxn_rdpcm_horz(src_residue, residue_out, src_residue_strd, trans_size,
729
0
                                             trans_size, ps_pl_tu_ctxt->zero_cols);
730
0
                    ps_pl_tu_ctxt->zero_cols = (1 << CTZ(~ps_pl_tu_ctxt->zero_cols)) - 1;
731
0
                }
732
0
                else
733
0
                {
734
0
                    ihevc_res_nxn_rdpcm_vert(src_residue, residue_out, src_residue_strd, trans_size,
735
0
                                             trans_size, ps_pl_tu_ctxt->zero_cols);
736
0
                }
737
0
            }
738
739
0
            if(!rdpcm && !rotate)
740
0
            {
741
0
                ihevc_res_nxn_copy(src_residue, residue_out, src_residue_strd, trans_size,
742
0
                                   trans_size, ps_pl_tu_ctxt->zero_cols);
743
0
            }
744
0
        }
745
0
        else
746
0
        {
747
            /* iQuant, iTrans */
748
0
            if(0 == ps_pl_tu_ctxt->coeff_type)
749
0
            {
750
0
                WORD32 func_tmp_idx = chroma_plane != NULL_PLANE ? func_idx - 4 : func_idx;
751
0
                ps_codec->apf_itrans_res[func_tmp_idx](ps_pl_tu_ctxt->pi2_tu_coeff,
752
0
                                                       ps_proc->pi2_itrans_intrmd_buf, residue_out,
753
0
                                                       ps_pl_tu_ctxt->tu_coeff_stride, trans_size,
754
0
                                                       ps_pl_tu_ctxt->zero_cols,
755
0
                                                       ps_pl_tu_ctxt->zero_rows);
756
0
            }
757
0
            else /* DC only */
758
0
            {
759
0
                ps_codec->apf_itrans_res_dc(residue_out, trans_size, log2_trans_size,
760
0
                                            ps_pl_tu_ctxt->coeff_value);
761
0
            }
762
0
            ps_pl_tu_ctxt->zero_cols = 0;
763
0
        }
764
0
        if(!alpha)
765
0
        {
766
0
            ps_codec->apf_recon[func_idx](residue_out, ps_pl_tu_ctxt->pu1_pred,
767
0
                                          ps_pl_tu_ctxt->pu1_dst, trans_size,
768
0
                                          ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd,
769
0
                                          ps_pl_tu_ctxt->zero_cols);
770
0
        }
771
0
    }
772
0
    if(alpha)
773
0
    {
774
0
        if(0 == ps_pl_tu_ctxt->cbf)
775
0
        {
776
0
            memset(residue_out, 0, trans_size * trans_size * sizeof(WORD16));
777
0
        }
778
0
        ihevc_chroma_recon_nxn_ccp(pi2_res, pi2_res_uv, ps_pl_tu_ctxt->pu1_pred,
779
0
                                   ps_pl_tu_ctxt->pu1_dst, alpha, trans_size, trans_size,
780
0
                                   trans_size, ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd);
781
0
    }
782
0
}
783
784
PF_IQITRECON_PLANE get_iqitrec_func(process_ctxt_t *ps_proc,
785
                                    tu_t *ps_tu,
786
                                    tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
787
                                    WORD32 log2_trans_size,
788
                                    CHROMA_PLANE_ID_T chroma_plane,
789
                                    WORD8 intra_flag,
790
                                    WORD8 intra_pred_mode)
791
46.6M
{
792
46.6M
    sps_t *ps_sps = ps_proc->ps_sps;
793
46.6M
    pps_t *ps_pps = ps_proc->ps_pps;
794
46.6M
    WORD8 trans_size = 1 << log2_trans_size;
795
796
46.6M
    if(1 == ps_pl_tu_ctxt->cbf
797
2.46M
                    && (ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag))
798
414k
    {
799
414k
        if(ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4 && intra_flag)
800
0
            return ihevcd_iquant_itrans_resi_recon_tu_plane;
801
414k
        if(ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag
802
0
                        && (intra_pred_mode == 10 || intra_pred_mode == 26))
803
0
            return ihevcd_iquant_itrans_resi_recon_tu_plane;
804
414k
        if(ps_pl_tu_ctxt->explicit_rdpcm_flag)
805
0
            return ihevcd_iquant_itrans_resi_recon_tu_plane;
806
414k
    }
807
46.6M
    if(ps_pps->i1_cross_component_prediction_enabled_flag)
808
0
    {
809
0
        if((chroma_plane == NULL_PLANE
810
0
                        && (ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0
811
0
                                        || ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0))
812
0
                        || (chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0)
813
0
                        || (chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0))
814
0
            return ihevcd_iquant_itrans_resi_recon_tu_plane;
815
0
    }
816
46.6M
    return ihevcd_iquant_itrans_recon_tu_plane;
817
46.6M
}
818
#endif
819
820
WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
821
2.61M
{
822
2.61M
    WORD16 *pi2_scaling_mat;
823
2.61M
    UWORD8 *pu1_y_dst_ctb;
824
2.61M
    UWORD8 *pu1_uv_dst_ctb;
825
2.61M
    WORD32 ctb_size;
826
2.61M
    codec_t *ps_codec;
827
2.61M
    slice_header_t *ps_slice_hdr;
828
2.61M
    tu_t *ps_tu;
829
2.61M
    WORD16 *pi2_ctb_coeff;
830
2.61M
    WORD32 tu_cnt;
831
2.61M
    WORD16 *pi2_tu_coeff;
832
2.61M
    WORD32 pic_strd;
833
2.61M
    WORD32 luma_nbr_flags;
834
2.61M
    WORD32 luma_nbr_flags_4x4[4] = { 0 };
835
2.61M
    WORD32 chroma_nbr_flags = 0;
836
2.61M
    WORD32 chroma_nbr_flags_subtu = 0;
837
2.61M
#ifdef ENABLE_MAIN_REXT_PROFILE
838
2.61M
    WORD32 disable_boundary_filter = 0;
839
2.61M
#endif
840
2.61M
    UWORD8 u1_luma_pred_mode_first_tu = 0;
841
    /* Pointers for generating 2d coeffs from coeff-map */
842
2.61M
    UWORD8 *pu1_tu_coeff_data;
843
    /* nbr avail map for CTB */
844
    /* 1st bit points to neighbor (left/top_left/bot_left) */
845
    /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
846
2.61M
    UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE + 2 /* Top nbr + bot nbr */];
847
2.61M
    UWORD32 top_avail_bits;
848
2.61M
    sps_t *ps_sps;
849
2.61M
    pps_t *ps_pps;
850
2.61M
    WORD32 intra_flag;
851
2.61M
    UWORD8 *pu1_pic_intra_flag;
852
2.61M
    WORD32 h_samp_factor, v_samp_factor;
853
2.61M
    WORD32 chroma_pixel_strd = 2;
854
2.61M
    PF_IQITRECON_PLANE iqitrecon_fptr = ihevcd_iquant_itrans_recon_tu_plane;
855
856
    /*************************************************************************/
857
    /* Contanis scaling matrix offset in the following order in a 1D buffer  */
858
    /* Entries that are listed as UNUSED are invalid combinations where      */
859
    /* scaling matrix is not used. eg: 64x64 SKIP CU, 64x64 PCM CU           */
860
    /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V                                       */
861
    /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V                                       */
862
    /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V                                       */
863
    /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V                                       */
864
    /* Intra 16x16 Y, 16x16 U, 16x16 V                                       */
865
    /* Inter 16x16 Y, 16x16 U, 16x16 V                                       */
866
    /* Intra 32x32 Y, 32x32 U, 32x32 V                                       */
867
    /* Inter 32x32 Y, 32x32 U, 32x32 V                                       */
868
    /* UNUSED,        UNUSED,  UNUSED                                        */
869
    /* UNUSED,        UNUSED,  UNUSED                                        */
870
    /*************************************************************************/
871
2.61M
    static const WORD32 scaling_mat_offset[] =
872
2.61M
      { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
873
2.61M
        1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136, 0, 0, 0, 0, 0, 0};
874
875
2.61M
    PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
876
877
2.61M
    ps_sps = ps_proc->ps_sps;
878
2.61M
    ps_pps = ps_proc->ps_pps;
879
2.61M
    ps_slice_hdr = ps_proc->ps_slice_hdr;
880
2.61M
    ps_codec = ps_proc->ps_codec;
881
882
2.61M
    pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
883
2.61M
    pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
884
885
2.61M
    pi2_ctb_coeff = ps_proc->pi2_invscan_out;
886
887
2.61M
    ctb_size = (1 << ps_sps->i1_log2_ctb_size);
888
2.61M
    pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
889
890
2.61M
    pic_strd = ps_codec->i4_strd;
891
892
2.61M
    pi2_tu_coeff = pi2_ctb_coeff;
893
894
2.61M
    ps_tu = ps_proc->ps_tu;
895
896
2.61M
    if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
897
24.4k
    {
898
24.4k
        pi2_scaling_mat = ps_pps->pi2_scaling_mat;
899
24.4k
    }
900
2.58M
    else
901
2.58M
    {
902
2.58M
        pi2_scaling_mat = ps_sps->pi2_scaling_mat;
903
2.58M
    }
904
905
2.61M
    {
906
        /* Updating the initial availability map */
907
2.61M
        WORD32 i;
908
2.61M
        UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
909
2.61M
                        u1_top_ctb_avail;
910
911
2.61M
        u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
912
2.61M
        u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
913
2.61M
        u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
914
2.61M
        u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
915
916
        /* Initializing the availability array */
917
2.61M
        memset(au4_intra_nbr_avail, 0,
918
2.61M
               (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
919
        /* Initializing the availability array with CTB level availability flags */
920
2.61M
        {
921
2.61M
            WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
922
2.61M
            WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
923
24.6M
            for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
924
22.0M
            {
925
22.0M
                au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
926
22.0M
            }
927
2.61M
        }
928
2.61M
        au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
929
2.61M
                        >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
930
931
2.61M
        au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
932
933
2.61M
        {
934
2.61M
            WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
935
2.61M
            WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
936
2.61M
            WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
937
938
            /* ctb_size_top gives number of valid pixels remaining in the current row */
939
            /* Since we need pattern of 1's starting from the MSB, an additional shift */
940
            /* is needed */
941
2.61M
            shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
942
943
2.61M
            top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
944
2.61M
                            << shift;
945
2.61M
        }
946
2.61M
        au4_intra_nbr_avail[0] |= (
947
2.61M
                        (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
948
        /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
949
950
2.61M
    }
951
952
2.61M
    h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2;
953
2.61M
    v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1;
954
955
    /* Applying Inverse transform on all the TU's in CTB */
956
45.6M
    for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
957
43.0M
    {
958
43.0M
        tu_plane_iq_it_recon_ctxt_t y_cb_tu = { 0 };
959
43.0M
        tu_plane_iq_it_recon_ctxt_t cr_tu = { 0 };
960
43.0M
        tu_plane_iq_it_recon_ctxt_t *ps_cb_tu = &y_cb_tu;
961
43.0M
        tu_plane_iq_it_recon_ctxt_t *ps_cr_tu = &cr_tu;
962
43.0M
#ifdef ENABLE_MAIN_REXT_PROFILE
963
43.0M
        tu_plane_iq_it_recon_ctxt_t cb_sub_tu = { 0 };
964
43.0M
        tu_plane_iq_it_recon_ctxt_t cr_sub_tu = { 0 };
965
43.0M
#endif
966
967
43.0M
        WORD32 num_comp, c_idx, func_idx;
968
969
43.0M
        WORD32 qp_div = 0, qp_rem = 0;
970
43.0M
        WORD32 qp_div_v = 0, qp_rem_v = 0;
971
43.0M
        WORD32 chroma_qp_idx;
972
43.0M
        WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
973
43.0M
        WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
974
975
43.0M
        WORD32 trans_size = 0;
976
43.0M
        TRANSFORM_TYPE e_trans_type;
977
43.0M
        WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
978
43.0M
        WORD32 log2_trans_size;
979
980
43.0M
        WORD32 tu_x, tu_y;
981
43.0M
        WORD32 tu_y_offset, tu_uv_offset;
982
43.0M
        UWORD8 u1_luma_pred_mode, u1_chroma_pred_mode;
983
43.0M
        WORD32 offset;
984
43.0M
        WORD32 pcm_flag;
985
43.0M
        WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
986
        /* If 420SP_VU is chroma format, pred and dst pointer   */
987
        /* will be added +1 to point to U                       */
988
43.0M
        WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
989
        /* If 420SP_VU is chroma format, pred and dst pointer   */
990
        /* will be added U offset of +1 and subtracted 2        */
991
        /* to point to V                                        */
992
43.0M
        WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
993
994
43.0M
        tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
995
43.0M
        tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
996
43.0M
        {
997
43.0M
            WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
998
43.0M
            WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
999
1000
43.0M
            WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
1001
1002
43.0M
            pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
1003
43.0M
            pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
1004
43.0M
            pu1_pic_intra_flag += (tu_abs_x >> 6);
1005
1006
43.0M
            intra_flag = *pu1_pic_intra_flag;
1007
43.0M
            intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
1008
43.0M
        }
1009
1010
43.0M
        u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
1011
43.0M
        u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
1012
1013
43.0M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && u1_chroma_pred_mode != 7)
1014
1.81M
            num_comp = 2; /* Y and UV */
1015
41.2M
        else
1016
41.2M
            num_comp = 1; /* Y */
1017
1018
43.0M
        pcm_flag = 0;
1019
1020
43.0M
        if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
1021
86
        {
1022
86
            UWORD8 *pu1_buf;
1023
86
            UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
1024
86
            UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
1025
86
            WORD32 i, j;
1026
86
            tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
1027
86
            WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
1028
1029
            /* trans_size is used to update availability after reconstruction */
1030
86
            trans_size = cb_size;
1031
1032
86
            pcm_flag = 1;
1033
1034
86
            tu_y_offset = tu_x + tu_y * pic_strd;
1035
86
            pu1_y_dst += tu_x + tu_y * pic_strd;
1036
1037
            /* First byte points to number of coded blocks */
1038
86
            pu1_tu_coeff_data++;
1039
1040
            /* Next byte points to scan type */
1041
86
            pu1_tu_coeff_data++;
1042
1043
86
            ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
1044
1045
86
            pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
1046
86
            {
1047
1048
966
                for(i = 0; i < cb_size; i++)
1049
880
                {
1050
                    //pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
1051
880
                    memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
1052
880
                    pu1_buf += cb_size;
1053
880
                }
1054
1055
86
                if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME)
1056
86
                {
1057
86
                    WORD32 chroma_strd = (pic_strd * chroma_pixel_strd) / h_samp_factor;
1058
1059
86
                    pu1_uv_dst += (tu_x * chroma_pixel_strd / h_samp_factor)
1060
86
                                    + (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor));
1061
86
                    pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
1062
1063
                    /* U */
1064
526
                    for(i = 0; i < cb_size / v_samp_factor; i++)
1065
440
                    {
1066
3.16k
                        for(j = 0; j < cb_size / h_samp_factor; j++)
1067
2.72k
                        {
1068
2.72k
                            pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++;
1069
2.72k
                        }
1070
440
                    }
1071
1072
86
                    pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
1073
1074
                    /* V */
1075
526
                    for(i = 0; i < cb_size / v_samp_factor; i++)
1076
440
                    {
1077
3.16k
                        for(j = 0; j < cb_size / h_samp_factor; j++)
1078
2.72k
                        {
1079
2.72k
                            pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++;
1080
2.72k
                        }
1081
440
                    }
1082
86
                }
1083
86
            }
1084
1085
86
            pu1_tu_coeff_data = pu1_buf;
1086
1087
86
        }
1088
1089
1090
1091
1092
1093
87.7M
        for(c_idx = 0; c_idx < num_comp; c_idx++)
1094
44.7M
        {
1095
44.7M
            if(0 == pcm_flag)
1096
44.7M
            {
1097
1098
44.7M
                if(c_idx == 0) /* Y */
1099
42.9M
                {
1100
                    /* Initializing variables */
1101
1102
42.9M
                    log2_y_trans_size_minus_2 = ps_tu->b3_size;
1103
42.9M
                    trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
1104
42.9M
                    log2_trans_size = log2_y_trans_size_minus_2 + 2;
1105
1106
42.9M
                    tu_y_offset = tu_x + tu_y * pic_strd;
1107
1108
                    /* Calculating scaling matrix offset */
1109
42.9M
                    offset = log2_y_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx;
1110
42.9M
                    pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset];
1111
1112
                    /* 4x4 transform Luma in INTRA mode is DST */
1113
42.9M
                    if(log2_y_trans_size_minus_2 == 0 && intra_flag)
1114
1.39M
                    {
1115
1.39M
                        func_idx = log2_y_trans_size_minus_2;
1116
1.39M
                        e_trans_type = DST_4x4;
1117
1.39M
                    }
1118
41.5M
                    else
1119
41.5M
                    {
1120
41.5M
                        func_idx = log2_y_trans_size_minus_2 + 1;
1121
41.5M
                        e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
1122
41.5M
                    }
1123
1124
42.9M
                    qp_div = ps_tu->b7_qp / 6;
1125
42.9M
                    qp_rem = ps_tu->b7_qp % 6;
1126
1127
42.9M
                    y_cb_tu.pi2_tu_coeff = pi2_tu_coeff;
1128
42.9M
                    y_cb_tu.pu1_pred = pu1_y_dst_ctb + tu_y_offset;
1129
42.9M
                    y_cb_tu.pu1_dst = pu1_y_dst_ctb + tu_y_offset;
1130
42.9M
                    y_cb_tu.tu_coeff_stride = trans_size;
1131
42.9M
                    y_cb_tu.pred_strd = pic_strd;
1132
42.9M
                    y_cb_tu.dst_strd = pic_strd;
1133
42.9M
                    y_cb_tu.cbf = ps_tu->b1_y_cbf;
1134
42.9M
                    y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
1135
42.9M
#ifdef ENABLE_MAIN_REXT_PROFILE
1136
42.9M
                    y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
1137
42.9M
                    y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
1138
42.9M
#endif
1139
                    /* Unpacking coeffs */
1140
42.9M
                    if(1 == y_cb_tu.cbf)
1141
1.92M
                    {
1142
1.92M
                        pu1_tu_coeff_data = ihevcd_unpack_coeffs(
1143
1.92M
                                        y_cb_tu.pi2_tu_coeff, log2_y_trans_size_minus_2 + 2,
1144
1.92M
                                        pu1_tu_coeff_data, pi2_dequant_matrix,
1145
1.92M
                                        qp_rem, qp_div, e_trans_type,
1146
1.92M
                                        ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols,
1147
1.92M
                                        &y_cb_tu.zero_rows, &y_cb_tu.coeff_type,
1148
1.92M
                                        &y_cb_tu.coeff_value);
1149
1.92M
                    }
1150
42.9M
                }
1151
1.81M
                else /* UV interleaved */
1152
1.81M
                {
1153
                    /* Initializing variables */
1154
1.81M
                    const WORD16 *pi2_ihevcd_chroma_qp =
1155
1.81M
                                    CHROMA_FMT_IDC_YUV420 != ps_sps->i1_chroma_format_idc ?
1156
0
                                                    gai2_ihevcd_chroma_qp_clip :
1157
1.81M
                                                    gai2_ihevcd_chroma_qp_420;
1158
1159
                    /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
1160
1.81M
                    if(ps_tu->b3_size == 0)
1161
516k
                    {
1162
516k
                        log2_uv_trans_size_minus_2 = ps_tu->b3_size;
1163
516k
                        if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc)
1164
0
                        {
1165
0
                            tu_uv_offset = (tu_x * chroma_pixel_strd)
1166
0
                                            + (tu_y * chroma_pixel_strd * pic_strd);
1167
0
                        }
1168
516k
                        else
1169
516k
                        {
1170
                            /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x - 4, luma pos y - 4) */
1171
516k
                            tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / v_samp_factor) * pic_strd;
1172
516k
                        }
1173
516k
                    }
1174
1.29M
                    else
1175
1.29M
                    {
1176
1.29M
                        if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc)
1177
0
                        {
1178
0
                            log2_uv_trans_size_minus_2 = ps_tu->b3_size;
1179
0
                        }
1180
1.29M
                        else
1181
1.29M
                        {
1182
1.29M
                            log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
1183
1.29M
                        }
1184
1.29M
                        tu_uv_offset = (tu_x * chroma_pixel_strd / h_samp_factor)
1185
1.29M
                                        + (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor));
1186
1.29M
                    }
1187
1.81M
                    trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
1188
1.81M
                    log2_trans_size = log2_uv_trans_size_minus_2 + 2;
1189
1190
                    /*TODO: Add support for choosing different tables for U and V,
1191
                     * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
1192
                     */
1193
                    /* Calculating scaling matrix offset */
1194
                    /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
1195
                     * max uv trans size is 16x16
1196
                     */
1197
1.81M
                    offset = log2_uv_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx;
1198
1.81M
                    pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset];
1199
1.81M
                    pi2_dequant_matrix_v = pi2_scaling_mat + scaling_mat_offset[offset + 1];
1200
1201
1.81M
                    func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
1202
1203
                    /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma.
1204
                     * Limit func_idx based on allowed max chroma tu size */
1205
1.81M
                    func_idx = MIN(func_idx, (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 8 : 7);
1206
1207
1.81M
                    e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
1208
                    /* QP for U */
1209
1.81M
                    i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
1210
1.81M
                    i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
1211
1212
1.81M
                    chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset;
1213
1.81M
                    chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
1214
1.81M
                    qp_div = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
1215
1.81M
                    qp_rem = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
1216
1217
                    /* QP for V */
1218
1.81M
                    i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
1219
1.81M
                    i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
1220
1221
1.81M
                    chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset;
1222
1.81M
                    chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
1223
1.81M
                    qp_div_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
1224
1.81M
                    qp_rem_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
1225
1226
1.81M
                    y_cb_tu.pi2_tu_coeff = pi2_tu_coeff;
1227
1.81M
                    y_cb_tu.pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
1228
1.81M
                    y_cb_tu.pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
1229
1.81M
                    y_cb_tu.tu_coeff_stride = trans_size;
1230
1.81M
                    y_cb_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1231
1.81M
                    y_cb_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1232
1.81M
                    y_cb_tu.cbf = ps_tu->b1_cb_cbf;
1233
1234
1.81M
                    cr_tu.pi2_tu_coeff = pi2_tu_coeff + trans_size * trans_size;
1235
1.81M
                    cr_tu.pu1_pred = y_cb_tu.pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
1236
1.81M
                    cr_tu.pu1_dst = y_cb_tu.pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
1237
1.81M
                    cr_tu.tu_coeff_stride = trans_size;
1238
1.81M
                    cr_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1239
1.81M
                    cr_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1240
1.81M
                    cr_tu.cbf = ps_tu->b1_cr_cbf;
1241
1242
                    /* Unpacking coeffs */
1243
1.81M
                    y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
1244
1.81M
#ifdef ENABLE_MAIN_REXT_PROFILE
1245
1.81M
                    y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
1246
1.81M
                    y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
1247
1.81M
#endif
1248
1.81M
                    if(1 == y_cb_tu.cbf)
1249
273k
                    {
1250
273k
                        pu1_tu_coeff_data = ihevcd_unpack_coeffs(
1251
273k
                                        y_cb_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
1252
273k
                                        pu1_tu_coeff_data, pi2_dequant_matrix,
1253
273k
                                        qp_rem, qp_div, e_trans_type,
1254
273k
                                        ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols,
1255
273k
                                        &y_cb_tu.zero_rows, &y_cb_tu.coeff_type,
1256
273k
                                        &y_cb_tu.coeff_value);
1257
273k
                    }
1258
1.81M
#ifdef ENABLE_MAIN_REXT_PROFILE
1259
1.81M
                    if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
1260
0
                    {
1261
0
                        cb_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu;
1262
0
                        cb_sub_tu.pu1_pred = y_cb_tu.pu1_pred + trans_size * y_cb_tu.pred_strd;
1263
0
                        cb_sub_tu.pu1_dst = y_cb_tu.pu1_dst + trans_size * y_cb_tu.dst_strd;
1264
0
                        cb_sub_tu.tu_coeff_stride = trans_size;
1265
0
                        cb_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1266
0
                        cb_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1267
0
                        cb_sub_tu.cbf = ps_tu->b1_cb_cbf_subtu1;
1268
0
                        cb_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
1269
0
                        cb_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
1270
0
                        cb_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
1271
0
                        if(1 == cb_sub_tu.cbf)
1272
0
                        {
1273
0
                            pu1_tu_coeff_data = ihevcd_unpack_coeffs(
1274
0
                                            cb_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
1275
0
                                            pu1_tu_coeff_data, pi2_dequant_matrix,
1276
0
                                            qp_rem, qp_div, e_trans_type,
1277
0
                                            ps_tu->b1_transquant_bypass, &cb_sub_tu.zero_cols,
1278
0
                                            &cb_sub_tu.zero_rows, &cb_sub_tu.coeff_type,
1279
0
                                            &cb_sub_tu.coeff_value);
1280
0
                        }
1281
0
                    }
1282
1.81M
#endif
1283
1284
1.81M
                    cr_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
1285
1.81M
#ifdef ENABLE_MAIN_REXT_PROFILE
1286
1.81M
                    cr_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
1287
1.81M
                    cr_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
1288
1.81M
#endif
1289
1.81M
                    if(1 == cr_tu.cbf)
1290
269k
                    {
1291
269k
                        pu1_tu_coeff_data = ihevcd_unpack_coeffs(
1292
269k
                                        cr_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
1293
269k
                                        pu1_tu_coeff_data, pi2_dequant_matrix_v,
1294
269k
                                        qp_rem_v, qp_div_v, e_trans_type,
1295
269k
                                        ps_tu->b1_transquant_bypass, &cr_tu.zero_cols,
1296
269k
                                        &cr_tu.zero_rows, &cr_tu.coeff_type, &cr_tu.coeff_value);
1297
269k
                    }
1298
1.81M
#ifdef ENABLE_MAIN_REXT_PROFILE
1299
1.81M
                    if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
1300
0
                    {
1301
0
                        cr_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu + trans_size * trans_size;
1302
0
                        cr_sub_tu.pu1_pred = cr_tu.pu1_pred + trans_size * cr_tu.pred_strd;
1303
0
                        cr_sub_tu.pu1_dst = cr_tu.pu1_dst + trans_size * cr_tu.dst_strd;
1304
0
                        cr_sub_tu.tu_coeff_stride = trans_size;
1305
0
                        cr_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1306
0
                        cr_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
1307
0
                        cr_sub_tu.cbf = ps_tu->b1_cr_cbf_subtu1;
1308
0
                        cr_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
1309
0
                        cr_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
1310
0
                        cr_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
1311
0
                        if(1 == cr_sub_tu.cbf)
1312
0
                        {
1313
0
                            pu1_tu_coeff_data = ihevcd_unpack_coeffs(
1314
0
                                            cr_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
1315
0
                                            pu1_tu_coeff_data, pi2_dequant_matrix_v,
1316
0
                                            qp_rem_v, qp_div_v, e_trans_type,
1317
0
                                            ps_tu->b1_transquant_bypass, &cr_sub_tu.zero_cols,
1318
0
                                            &cr_sub_tu.zero_rows, &cr_sub_tu.coeff_type,
1319
0
                                            &cr_sub_tu.coeff_value);
1320
0
                        }
1321
0
                    }
1322
1.81M
#endif
1323
1.81M
                }
1324
44.7M
                WORD8 subtu_idx = 0;
1325
44.7M
                do
1326
44.7M
                {
1327
                /***************************************************************/
1328
                /******************  Intra Prediction **************************/
1329
                /***************************************************************/
1330
44.7M
                if(intra_flag) /* Intra */
1331
3.26M
                {
1332
                    /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
1333
                       au1_ref_sub_out size is kept as multiple of 8,
1334
                       so that SIMD functions can load 64 bits. Also some SIMD
1335
                       modules read few bytes before the start of the array, so
1336
                       allocate 16 extra bytes at the start */
1337
3.26M
                    UWORD8 au1_ref_sub_out[16 + (MAX_TU_SIZE * 2 * 2 * 2) + 8] = {0};
1338
3.26M
                    UWORD8 *pu1_ref_sub_out = &au1_ref_sub_out[16];
1339
3.26M
                    UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
1340
3.26M
                    WORD32 luma_pred_func_idx, chroma_pred_func_idx;
1341
1342
                    /* Get the neighbour availability flags */
1343
                    /* Done for only Y */
1344
3.26M
                    if(c_idx == 0)
1345
2.15M
                    {
1346
                        /* Get neighbor availability for Y only */
1347
2.15M
                        luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
1348
2.15M
                                                                   ps_tu,
1349
2.15M
                                                                   au4_intra_nbr_avail,
1350
2.15M
                                                                   ps_sps->i2_pic_width_in_luma_samples,
1351
2.15M
                                                                   ps_pps->i1_constrained_intra_pred_flag,
1352
2.15M
                                                                   trans_size,
1353
2.15M
                                                                   ctb_size);
1354
1355
2.15M
                        if(trans_size == 4)
1356
1.39M
                            luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
1357
1358
2.15M
                        if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
1359
0
                        {
1360
0
                            chroma_nbr_flags = luma_nbr_flags;
1361
0
                        }
1362
2.15M
                        else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
1363
0
                        {
1364
0
                            WORD32 bot_left, left, top, tp_right, tp_left;
1365
0
                            tp_left = (luma_nbr_flags & 0x10000);
1366
0
                            tp_right = (luma_nbr_flags & 0x0f000);
1367
0
                            top = (luma_nbr_flags & 0x00f00);
1368
0
                            left = (luma_nbr_flags & 0x000f0);
1369
0
                            bot_left = (luma_nbr_flags & 0x0000f);
1370
0
                            chroma_nbr_flags = tp_left | tp_right | top | left | (left >> 4);
1371
0
                            chroma_nbr_flags_subtu = ((left != 0 ? 1 : 0) << 16) | (0xf << 8)
1372
0
                                            | left | bot_left;
1373
0
                        }
1374
2.15M
                        else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420)
1375
2.15M
                        {
1376
2.15M
                            if(((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0)))
1377
1.11M
                                chroma_nbr_flags = luma_nbr_flags;
1378
2.15M
                        }
1379
1380
                        /* Initializing nbr pointers */
1381
2.15M
                        pu1_top = y_cb_tu.pu1_pred - pic_strd;
1382
2.15M
                        pu1_left = y_cb_tu.pu1_pred - 1;
1383
2.15M
                        pu1_top_left = y_cb_tu.pu1_pred - pic_strd - 1;
1384
1385
                        /* call reference array substitution */
1386
2.15M
                        if(luma_nbr_flags == 0x1ffff)
1387
518k
                            ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
1388
518k
                                            pu1_top_left,
1389
518k
                                            pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1);
1390
1.63M
                        else
1391
1.63M
                            ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
1392
1.63M
                                            pu1_top_left,
1393
1.63M
                                            pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1);
1394
1395
                        /* call reference filtering */
1396
2.15M
                        ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
1397
2.15M
                                        pu1_ref_sub_out,
1398
2.15M
                                        trans_size,
1399
2.15M
                                        pu1_ref_sub_out,
1400
2.15M
                                        u1_luma_pred_mode,
1401
2.15M
#ifdef ENABLE_MAIN_REXT_PROFILE
1402
2.15M
                                        (ps_sps->i1_intra_smoothing_disabled_flag << 3
1403
2.15M
                                                        | ps_sps->i1_strong_intra_smoothing_enable_flag)
1404
#else
1405
                                        ps_sps->i1_strong_intra_smoothing_enable_flag
1406
#endif
1407
2.15M
                                        );
1408
1409
                        /* use the look up to get the function idx */
1410
2.15M
                        luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
1411
1412
2.15M
#ifdef ENABLE_MAIN_REXT_PROFILE
1413
2.15M
                        if(ps_sps->i1_implicit_rdpcm_enabled_flag && ps_tu->b1_transquant_bypass
1414
0
                                        && (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26))
1415
0
                            disable_boundary_filter = 1;
1416
2.15M
#endif
1417
                        /* call the intra prediction function */
1418
2.15M
                        ps_codec->apf_intra_pred_luma[luma_pred_func_idx](
1419
2.15M
                                        pu1_ref_sub_out, 1,
1420
2.15M
                                        y_cb_tu.pu1_pred,
1421
2.15M
                                        y_cb_tu.pred_strd,
1422
2.15M
                                        trans_size,
1423
2.15M
#ifdef ENABLE_MAIN_REXT_PROFILE
1424
2.15M
                                        (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26) ?
1425
263k
                                                        disable_boundary_filter :
1426
2.15M
                                                        u1_luma_pred_mode
1427
#else
1428
                                        u1_luma_pred_mode
1429
#endif
1430
2.15M
                                        );
1431
2.15M
                    }
1432
1.11M
                    else
1433
1.11M
                    {
1434
1435
1.11M
#ifdef ENABLE_MAIN_REXT_PROFILE
1436
1.11M
                        if(subtu_idx != 0)
1437
0
                        {
1438
0
                            ps_cb_tu = &cb_sub_tu;
1439
0
                            ps_cr_tu = &cr_sub_tu;
1440
0
                            chroma_nbr_flags = chroma_nbr_flags_subtu;
1441
0
                        }
1442
1.11M
#endif
1443
1444
                        /* In case of yuv420sp_vu, prediction happens as usual.         */
1445
                        /* So point the pu1_pred pointer to original prediction pointer */
1446
1.11M
                        UWORD8 *pu1_pred_orig = ps_cb_tu->pu1_pred - chroma_yuv420sp_vu_u_offset;
1447
1448
                        /*    Top-Left | Top-Right | Top | Left | Bottom-Left
1449
                         *      1         4         4     4         4
1450
                         *
1451
                         * Generating chroma_nbr_flags depending upon the transform size */
1452
1.11M
                        if(ps_tu->b3_size == 0)
1453
348k
                        {
1454
348k
                            if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420)
1455
348k
                            {
1456
                                /* Take TL,T,L flags of First luma 4x4 block */
1457
348k
                                chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
1458
                                /* Take TR flags of Second luma 4x4 block */
1459
348k
                                chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
1460
                                /* Take BL flags of Third luma 4x4 block */
1461
348k
                                chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
1462
348k
                            }
1463
8
                            else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
1464
0
                            {
1465
0
                                if(subtu_idx == 0)
1466
0
                                {
1467
                                    /* Take TL,T,L flags of First luma 4x4 block */
1468
0
                                    chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
1469
                                    /* Take TR flags of Second luma 4x4 block */
1470
0
                                    chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
1471
                                    /* Take BL flags of first luma 4x4 block */
1472
0
                                    chroma_nbr_flags |= (luma_nbr_flags_4x4[0] & 0x0000F);
1473
0
                                }
1474
0
                                else
1475
0
                                {
1476
                                    /* Take TL,T,L flags of Third luma 4x4 block */
1477
0
                                    chroma_nbr_flags = (luma_nbr_flags_4x4[2] & 0x10FF0);
1478
                                    /* Take BL flags of Third luma 4x4 block */
1479
0
                                    chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
1480
0
                                }
1481
0
                            }
1482
348k
                        }
1483
1484
                        /* Initializing nbr pointers */
1485
1.11M
                        pu1_top = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor);
1486
1.11M
                        pu1_left = pu1_pred_orig - 2;
1487
1.11M
                        pu1_top_left = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor) - 2;
1488
1489
1.11M
                        if(subtu_idx == 0)
1490
1.11M
                        {
1491
                        /* Chroma pred  mode derivation from luma pred mode */
1492
1.11M
                        {
1493
1.11M
                            tu_t *ps_tu_tmp = ps_tu;
1494
1.11M
                            if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV444)
1495
1.11M
                            {
1496
3.25M
                                while(!ps_tu_tmp->b1_first_tu_in_cu)
1497
2.13M
                                {
1498
2.13M
                                    ps_tu_tmp--;
1499
2.13M
                                }
1500
1.11M
                            }
1501
1.11M
                            u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
1502
1.11M
                        }
1503
1.11M
                        if(4 == u1_chroma_pred_mode)
1504
845k
                            u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
1505
266k
                        else
1506
266k
                        {
1507
266k
                            u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
1508
1509
266k
                            if(u1_chroma_pred_mode == u1_luma_pred_mode_first_tu)
1510
29.0k
                            {
1511
29.0k
                                u1_chroma_pred_mode = INTRA_ANGULAR(34);
1512
29.0k
                            }
1513
266k
                        }
1514
1.11M
                        if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
1515
0
                        {
1516
0
                            u1_chroma_pred_mode = gau1_intra_pred_chroma_modes_422[u1_chroma_pred_mode];
1517
0
                        }
1518
1.11M
                        }
1519
1520
                        /* call the chroma reference array substitution */
1521
1.11M
                        ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
1522
1.11M
                                        pu1_top_left,
1523
1.11M
                                        pu1_top, pu1_left,
1524
1.11M
                                        ps_cb_tu->pred_strd,
1525
1.11M
                                        trans_size, chroma_nbr_flags, pu1_ref_sub_out, 1,
1526
1.11M
                                        ps_sps->i1_chroma_format_idc);
1527
1528
1.11M
#ifdef ENABLE_MAIN_REXT_PROFILE
1529
                        /* call reference filtering */
1530
1.11M
                        if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
1531
0
                        {
1532
0
                            ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_filtering_fptr(
1533
0
                                            pu1_ref_sub_out,
1534
0
                                            trans_size,
1535
0
                                            pu1_ref_sub_out,
1536
0
                                            u1_chroma_pred_mode,
1537
0
                                            (ps_sps->i1_intra_smoothing_disabled_flag << 3
1538
0
                                                            | ps_sps->i1_strong_intra_smoothing_enable_flag));
1539
0
                        }
1540
1.11M
#endif
1541
1542
                        /* use the look up to get the function idx */
1543
1.11M
                        chroma_pred_func_idx = g_i4_ip_funcs[u1_chroma_pred_mode];
1544
1545
                        /* call the intra prediction function */
1546
1.11M
                        ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred_orig, ps_cb_tu->pred_strd, trans_size, u1_chroma_pred_mode);
1547
1.11M
                    }
1548
3.26M
                }
1549
1550
                /* Updating number of transform types */
1551
44.7M
                STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
1552
1553
44.7M
#ifdef ENABLE_MAIN_REXT_PROFILE
1554
44.7M
                iqitrecon_fptr = get_iqitrec_func(
1555
44.7M
                                ps_proc, ps_tu, ps_cb_tu, log2_trans_size,
1556
44.7M
                                c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag,
1557
44.7M
                                c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode);
1558
44.7M
#endif
1559
                /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
1560
44.7M
                iqitrecon_fptr(ps_proc, ps_tu, ps_cb_tu, func_idx, log2_trans_size,
1561
44.7M
                               c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag,
1562
44.7M
                               c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode);
1563
                /* IQ, IT and Recon for V */
1564
44.7M
                if(c_idx != 0)
1565
1.81M
                {
1566
1.81M
#ifdef ENABLE_MAIN_REXT_PROFILE
1567
1.81M
                    iqitrecon_fptr = get_iqitrec_func(ps_proc, ps_tu, ps_cr_tu, log2_trans_size,
1568
1.81M
                                                      V_PLANE, intra_flag, u1_chroma_pred_mode);
1569
1.81M
#endif
1570
1.81M
                    iqitrecon_fptr(ps_proc, ps_tu, ps_cr_tu, func_idx, log2_trans_size, V_PLANE,
1571
1.81M
                                   intra_flag, u1_chroma_pred_mode);
1572
1.81M
                }
1573
44.7M
                }
1574
44.7M
                while(c_idx != 0 && ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422
1575
0
                                && ++subtu_idx < 2);
1576
44.7M
            }
1577
1578
            /* Neighbor availability inside CTB */
1579
            /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
1580
            /* Used for neighbor availability in intra pred */
1581
44.7M
            if(c_idx == 0)
1582
43.0M
            {
1583
43.0M
                WORD32 i;
1584
43.0M
                WORD32 trans_in_min_tu;
1585
43.0M
                UWORD32 cur_tu_in_bits;
1586
43.0M
                UWORD32 cur_tu_avail_flag;
1587
1588
43.0M
                trans_in_min_tu = trans_size / MIN_TU_SIZE;
1589
43.0M
                cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
1590
43.0M
                cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
1591
1592
43.0M
                cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
1593
1594
135M
                for(i = 0; i < trans_in_min_tu; i++)
1595
92.1M
                    au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
1596
92.1M
                                    cur_tu_avail_flag;
1597
43.0M
            }
1598
44.7M
        }
1599
43.0M
    }
1600
2.61M
    ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
1601
1602
2.61M
    return ps_proc->i4_ctb_tu_cnt;
1603
2.61M
}
1604